From e64ded573f6e9428943ead9bff606f9e1672f4d7 Mon Sep 17 00:00:00 2001 From: schmikei Date: Mon, 17 Nov 2025 15:21:19 -0500 Subject: [PATCH 1/2] modernize the mongodbatlas mixin --- mongodb-atlas-mixin/README.md | 40 +- .../{alerts => }/alerts.libsonnet | 76 +- mongodb-atlas-mixin/config.libsonnet | 53 +- mongodb-atlas-mixin/dashboards.libsonnet | 176 ++ .../dashboards/dashboards.libsonnet | 5 - .../mongodb-atlas-cluster-overview.libsonnet | 2159 ----------------- ...mongodb-atlas-elections-overview.libsonnet | 920 ------- ...ongodb-atlas-operations-overview.libsonnet | 1338 ---------- ...ngodb-atlas-performance-overview.libsonnet | 833 ------- .../mongodb-atlas-sharding-overview.libsonnet | 1335 ---------- .../mongodb-atlas-cluster-overview.json | 1796 +++++--------- .../mongodb-atlas-elections-overview.json | 957 +++----- .../mongodb-atlas-operations-overview.json | 1413 ++++------- .../mongodb-atlas-performance-overview.json | 902 +++---- .../mongodb-atlas-sharding-overview.json | 1113 +++++++++ mongodb-atlas-mixin/g.libsonnet | 3 + mongodb-atlas-mixin/jsonnetfile.json | 44 +- mongodb-atlas-mixin/links.libsonnet | 31 + mongodb-atlas-mixin/main.libsonnet | 49 + mongodb-atlas-mixin/mixin.libsonnet | 34 +- mongodb-atlas-mixin/panels.libsonnet | 1448 +++++++++++ .../prometheus_alerts.yaml | 22 +- mongodb-atlas-mixin/rows.libsonnet | 336 +++ mongodb-atlas-mixin/signals/cluster.libsonnet | 503 ++++ .../signals/elections.libsonnet | 207 ++ .../signals/operations.libsonnet | 733 ++++++ .../signals/performance.libsonnet | 738 ++++++ .../signals/sharding.libsonnet | 458 ++++ 28 files changed, 7595 insertions(+), 10127 deletions(-) rename mongodb-atlas-mixin/{alerts => }/alerts.libsonnet (83%) create mode 100644 mongodb-atlas-mixin/dashboards.libsonnet delete mode 100644 mongodb-atlas-mixin/dashboards/dashboards.libsonnet delete mode 100644 mongodb-atlas-mixin/dashboards/mongodb-atlas-cluster-overview.libsonnet delete mode 100644 mongodb-atlas-mixin/dashboards/mongodb-atlas-elections-overview.libsonnet delete mode 100644 mongodb-atlas-mixin/dashboards/mongodb-atlas-operations-overview.libsonnet delete mode 100644 mongodb-atlas-mixin/dashboards/mongodb-atlas-performance-overview.libsonnet delete mode 100644 mongodb-atlas-mixin/dashboards/mongodb-atlas-sharding-overview.libsonnet create mode 100644 mongodb-atlas-mixin/dashboards_out/mongodb-atlas-sharding-overview.json create mode 100644 mongodb-atlas-mixin/g.libsonnet create mode 100644 mongodb-atlas-mixin/links.libsonnet create mode 100644 mongodb-atlas-mixin/main.libsonnet create mode 100644 mongodb-atlas-mixin/panels.libsonnet create mode 100644 mongodb-atlas-mixin/rows.libsonnet create mode 100644 mongodb-atlas-mixin/signals/cluster.libsonnet create mode 100644 mongodb-atlas-mixin/signals/elections.libsonnet create mode 100644 mongodb-atlas-mixin/signals/operations.libsonnet create mode 100644 mongodb-atlas-mixin/signals/performance.libsonnet create mode 100644 mongodb-atlas-mixin/signals/sharding.libsonnet diff --git a/mongodb-atlas-mixin/README.md b/mongodb-atlas-mixin/README.md index 3ac7a0e0c..3413018c3 100644 --- a/mongodb-atlas-mixin/README.md +++ b/mongodb-atlas-mixin/README.md @@ -11,18 +11,18 @@ The MongoDB Atlas mixin contains the following dashboards: and the following alerts: -- MongoDBAtlasHighNumberOfSlowNetworkRequests +- MongoDBAtlasCollExclusiveDeadlocks +- MongoDBAtlasCollIntentExclDeadlocks +- MongoDBAtlasCollSharedDeadlocks +- MongoDBAtlasCollIntentSharedDeadlocks +- MongoDBAtlasDBExclusiveDeadlocks +- MongoDBAtlasDBIntentExclDeadlocks +- MongoDBAtlasDBSharedDeadlocks +- MongoDBAtlasDBIntentSharedDeadlocks +- MongoDBAtlasSlowNetworkRequests - MongoDBAtlasDiskSpaceLow - MongoDBAtlasSlowHardwareIO -- MongoDBAtlasHighNumberOfTimeoutElections -- MongoDBAtlasHighNumberOfCollectionExclusiveDeadlocks -- MongoDBAtlasHighNumberOfCollectionIntentExclusiveDeadlocks -- MongoDBAtlasHighNumberOfCollectionSharedDeadlocks -- MongoDBAtlasHighNumberOfCollectionIntentSharedDeadlocks -- MongoDBAtlasHighNumberOfDatabaseExclusiveDeadlocks -- MongoDBAtlasHighNumberOfDatabaseIntentExclusiveDeadlocks -- MongoDBAtlasHighNumberOfDatabaseSharedDeadlocks -- MongoDBAtlasHighNumberOfDatabaseIntentSharedDeadlocks +- MongoDBAtlasElectionTimeouts **Please note:** - Some metrics may be reset if the MongoDB Atlas cluster is ever reset. @@ -78,18 +78,18 @@ This mixin includes the MongoDB Atlas sharding overview dashboard, however the m ## Alerts overview -- MongoDBAtlasHighNumberOfSlowNetworkRequests: There is a high number of slow network requests. +- MongoDBAtlasCollExclusiveDeadlocks: There is a high number of collection exclusive deadlocks occurring. +- MongoDBAtlasCollIntentExclDeadlocks: There is a high number of collection intent-exclusive deadlocks occurring. +- MongoDBAtlasCollSharedDeadlocks: There is a high number of collection shared deadlocks occurring. +- MongoDBAtlasCollIntentSharedDeadlocks: There is a high number of collection intent-shared deadlocks occurring. +- MongoDBAtlasDBExclusiveDeadlocks: There is a high number of database exclusive deadlocks occurring. +- MongoDBAtlasDBIntentExclDeadlocks: There is a high number of database intent-exclusive deadlocks occurring. +- MongoDBAtlasDBSharedDeadlocks: There is a high number of database shared deadlocks occurring. +- MongoDBAtlasDBIntentSharedDeadlocks: There is a high number of database intent-shared deadlocks occurring. +- MongoDBAtlasSlowNetworkRequests: There is a high number of slow network requests. - MongoDBAtlasDiskSpaceLow: Hardware is running out of disk space. - MongoDBAtlasSlowHardwareIO: Read and write I/Os are taking too long to complete. -- MongoDBAtlasHighNumberOfTimeoutElections: There is a high number of elections being called due to the primary node timing out. -- MongoDBAtlasHighNumberOfCollectionExclusiveDeadlocks: There is a high number of collection exclusive-lock deadlocks. -- MongoDBAtlasHighNumberOfCollectionIntentExclusiveDeadlocks: There is a high number of collection intent-exclusive-lock deadlocks. -- MongoDBAtlasHighNumberOfCollectionSharedDeadlocks: There is a high number of collection shared-lock deadlocks. -- MongoDBAtlasHighNumberOfCollectionIntentSharedDeadlocks: There is a high number of collection intent-shared-lock deadlocks. -- MongoDBAtlasHighNumberOfDatabaseExclusiveDeadlocks: There is a high number of database exclusive-lock deadlocks. -- MongoDBAtlasHighNumberOfDatabaseIntentExclusiveDeadlocks: There is a high number of database intent-exclusive-lock deadlocks. -- MongoDBAtlasHighNumberOfDatabaseSharedDeadlocks: There is a high number of database shared-lock deadlocks. -- MongoDBAtlasHighNumberOfDatabaseIntentSharedDeadlocks: There is a high number of database intent-shared-lock deadlocks. +- MongoDBAtlasElectionTimeouts: There is a high number of elections being called due to the primary node timing out. Default thresholds can be configured in `config.libsonnet`. ```js diff --git a/mongodb-atlas-mixin/alerts/alerts.libsonnet b/mongodb-atlas-mixin/alerts.libsonnet similarity index 83% rename from mongodb-atlas-mixin/alerts/alerts.libsonnet rename to mongodb-atlas-mixin/alerts.libsonnet index 9cf658b9d..0c85b0ed5 100644 --- a/mongodb-atlas-mixin/alerts/alerts.libsonnet +++ b/mongodb-atlas-mixin/alerts.libsonnet @@ -1,14 +1,14 @@ { - prometheusAlerts+:: { - groups+: [ + new(this): { + groups: [ { - name: 'mongodb-atlas-alerts', + name: this.config.uid + '-alerts', rules: [ { - alert: 'MongoDBAtlasHighNumberOfCollectionExclusiveDeadlocks', + alert: 'MongoDBAtlasCollExclusiveDeadlocks', expr: ||| sum without(cl_role,process_port,rs_nm,rs_state) (increase(mongodb_locks_Collection_deadlockCount_W[5m])) > %(alertsDeadlocks)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -18,14 +18,14 @@ description: ( 'The number of collection exclusive-lock deadlocks occurring on node {{$labels.instance}} in cluster {{$labels.cl_name}} is {{printf "%%.0f" $value}} which is above the threshold of %(alertsDeadlocks)s.' - ) % $._config, + ) % this.config, }, }, { - alert: 'MongoDBAtlasHighNumberOfCollectionIntentExclusiveDeadlocks', + alert: 'MongoDBAtlasCollIntentExclDeadlocks', expr: ||| sum without(cl_role,process_port,rs_nm,rs_state) (increase(mongodb_locks_Collection_deadlockCount_w[5m])) > %(alertsDeadlocks)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -35,14 +35,14 @@ description: ( 'The number of collection intent-exclusive-lock deadlocks occurring on node {{$labels.instance}} in cluster {{$labels.cl_name}} is {{printf "%%.0f" $value}} which is above the threshold of %(alertsDeadlocks)s.' - ) % $._config, + ) % this.config, }, }, { - alert: 'MongoDBAtlasHighNumberOfCollectionSharedDeadlocks', + alert: 'MongoDBAtlasCollSharedDeadlocks', expr: ||| sum without(cl_role,process_port,rs_nm,rs_state) (increase(mongodb_locks_Collection_deadlockCount_R[5m])) > %(alertsDeadlocks)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -52,14 +52,14 @@ description: ( 'The number of collection shared-lock deadlocks occurring on node {{$labels.instance}} in cluster {{$labels.cl_name}} is {{printf "%%.0f" $value}} which is above the threshold of %(alertsDeadlocks)s.' - ) % $._config, + ) % this.config, }, }, { - alert: 'MongoDBAtlasHighNumberOfCollectionIntentSharedDeadlocks', + alert: 'MongoDBAtlasCollIntentSharedDeadlocks', expr: ||| sum without(cl_role,process_port,rs_nm,rs_state) (increase(mongodb_locks_Collection_deadlockCount_r[5m])) > %(alertsDeadlocks)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -69,14 +69,14 @@ description: ( 'The number of collection intent-shared-lock deadlocks occurring on node {{$labels.instance}} in cluster {{$labels.cl_name}} is {{printf "%%.0f" $value}} which is above the threshold of %(alertsDeadlocks)s.' - ) % $._config, + ) % this.config, }, }, { - alert: 'MongoDBAtlasHighNumberOfDatabaseExclusiveDeadlocks', + alert: 'MongoDBAtlasDBExclusiveDeadlocks', expr: ||| sum without(cl_role,process_port,rs_nm,rs_state) (increase(mongodb_locks_Database_deadlockCount_W[5m])) > %(alertsDeadlocks)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -86,14 +86,14 @@ description: ( 'The number of database exclusive-lock deadlocks occurring on node {{$labels.instance}} in cluster {{$labels.cl_name}} is {{printf "%%.0f" $value}} which is above the threshold of %(alertsDeadlocks)s.' - ) % $._config, + ) % this.config, }, }, { - alert: 'MongoDBAtlasHighNumberOfDatabaseIntentExclusiveDeadlocks', + alert: 'MongoDBAtlasDBIntentExclDeadlocks', expr: ||| sum without(cl_role,process_port,rs_nm,rs_state) (increase(mongodb_locks_Database_deadlockCount_w[5m])) > %(alertsDeadlocks)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -103,14 +103,14 @@ description: ( 'The number of database intent-exclusive-lock deadlocks occurring on node {{$labels.instance}} in cluster {{$labels.cl_name}} is {{printf "%%.0f" $value}} which is above the threshold of %(alertsDeadlocks)s.' - ) % $._config, + ) % this.config, }, }, { - alert: 'MongoDBAtlasHighNumberOfDatabaseSharedDeadlocks', + alert: 'MongoDBAtlasDBSharedDeadlocks', expr: ||| sum without(cl_role,process_port,rs_nm,rs_state) (increase(mongodb_locks_Database_deadlockCount_R[5m])) > %(alertsDeadlocks)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -120,14 +120,14 @@ description: ( 'The number of database shared-lock deadlocks occurring on node {{$labels.instance}} in cluster {{$labels.cl_name}} is {{printf "%%.0f" $value}} which is above the threshold of %(alertsDeadlocks)s.' - ) % $._config, + ) % this.config, }, }, { - alert: 'MongoDBAtlasHighNumberOfDatabaseIntentSharedDeadlocks', + alert: 'MongoDBAtlasDBIntentSharedDeadlocks', expr: ||| sum without(cl_role,process_port,rs_nm,rs_state) (increase(mongodb_locks_Database_deadlockCount_r[5m])) > %(alertsDeadlocks)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -137,14 +137,14 @@ description: ( 'The number of database intent-shared-lock deadlocks occurring on node {{$labels.instance}} in cluster {{$labels.cl_name}} is {{printf "%%.0f" $value}} which is above the threshold of %(alertsDeadlocks)s.' - ) % $._config, + ) % this.config, }, }, { - alert: 'MongoDBAtlasHighNumberOfSlowNetworkRequests', + alert: 'MongoDBAtlasSlowNetworkRequests', expr: ||| sum without (cl_role,rs_nm,rs_state,process_port) (increase(mongodb_network_numSlowSSLOperations[5m])) + sum without (cl_role,rs_nm,rs_state,process_port) (increase(mongodb_network_numSlowDNSOperations[5m])) > %(alertsSlowNetworkRequests)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -154,14 +154,14 @@ description: ( 'The number of DNS and SSL operations taking more than 1 second to complete on node {{$labels.instance}} in cluster {{$labels.cl_name}} is {{printf "%%.0f" $value}} which is above the threshold of %(alertsSlowNetworkRequests)s.' - ) % $._config, + ) % this.config, }, }, { alert: 'MongoDBAtlasDiskSpaceLow', expr: ||| 100 * ((sum without (disk_name) (hardware_disk_metrics_disk_space_used_bytes)) / clamp_min((sum without (disk_name) (hardware_disk_metrics_disk_space_used_bytes)) + (sum without (disk_name) (hardware_disk_metrics_disk_space_free_bytes)), 1)) > %(alertsHighDiskUsage)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -171,14 +171,14 @@ description: ( 'The amount of hardware disk space being used on node {{$labels.instance}} in cluster {{$labels.cl_name}} is {{printf "%%.0f" $value}}%% which is above the threshold of %(alertsHighDiskUsage)s%%.' - ) % $._config, + ) % this.config, }, }, { alert: 'MongoDBAtlasSlowHardwareIO', expr: ||| (sum without (disk_name) (increase(hardware_disk_metrics_read_time_milliseconds[5m])) + sum without (disk_name) (increase(hardware_disk_metrics_write_time_milliseconds[5m]))) / 1000 > %(alertsSlowHardwareIO)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -188,14 +188,14 @@ description: ( 'The latency time for read and write I/Os on node {{$labels.instance}} in cluster {{$labels.cl_name}} is {{printf "%%.0f" $value}} seconds which is above the threshold of %(alertsSlowHardwareIO)s seconds.' - ) % $._config, + ) % this.config, }, }, { - alert: 'MongoDBAtlasHighNumberOfTimeoutElections', + alert: 'MongoDBAtlasElectionTimeouts', expr: ||| sum without (cl_role,process_port,instance,rs_state) (increase(mongodb_electionMetrics_electionTimeout_called[5m])) > %(alertsHighTimeoutElections)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -204,8 +204,8 @@ summary: 'There is a high number of elections being called due to the primary node timing out.', description: ( - 'The number of elections being called due to the primary node timing out in replica set {{$labels.rs_m}} in cluster {{$labels.cl_name}} is {{printf "%%.0f" $value}} which is above the threshold of %(alertsHighTimeoutElections)s.' - ) % $._config, + 'The number of elections being called due to the primary node timing out in replica set {{$labels.rs_nm}} in cluster {{$labels.cl_name}} is {{printf "%%.0f" $value}} which is above the threshold of %(alertsHighTimeoutElections)s.' + ) % this.config, }, }, ], diff --git a/mongodb-atlas-mixin/config.libsonnet b/mongodb-atlas-mixin/config.libsonnet index a7b92c965..8b6c87abc 100644 --- a/mongodb-atlas-mixin/config.libsonnet +++ b/mongodb-atlas-mixin/config.libsonnet @@ -1,18 +1,45 @@ { - _config+:: { - // sharding dashboard flag - enableShardingOverview: false, + local this = self, - dashboardTags: ['mongodb-atlas-mixin'], - dashboardPeriod: 'now-30m', - dashboardTimezone: 'default', - dashboardRefresh: '1m', + // Basic filtering - MongoDB Atlas uses job and cl_name (cluster name) as primary filters + filteringSelector: 'job="integrations/mongodb-atlas"', + groupLabels: ['job', 'cl_name'], + instanceLabels: ['instance'], - // alerts thresholds - alertsDeadlocks: 10, // count - alertsSlowNetworkRequests: 10, // count - alertsHighDiskUsage: 90, // percentage: 0-100 - alertsSlowHardwareIO: 3, // seconds - alertsHighTimeoutElections: 10, // count + // Dashboard settings + dashboardTags: ['mongodb-atlas-mixin'], + uid: 'mongodb-atlas', + dashboardNamePrefix: 'MongoDB Atlas', + dashboardRefresh: '1m', + dashboardPeriod: 'now-30m', + dashboardTimezone: 'default', + + // Sharding dashboard flag, enable this to generate the sharding overview dashboard + enableShardingOverview: true, + + // Logs configuration (MongoDB Atlas does not have Loki logs by default) + enableLokiLogs: false, // note for users, this is not supported by the MongoDB Atlas mixin as there shouldn't be any logs to monitor yet + logLabels: [], + extraLogLabels: [], + logsVolumeGroupBy: 'level', + showLogsVolume: false, + + // Alert thresholds with units + alertsDeadlocks: 10, // count + alertsSlowNetworkRequests: 10, // count + alertsHighDiskUsage: 90, // % + alertsSlowHardwareIO: 3, // seconds + alertsHighTimeoutElections: 10, // count + + // Metrics source + metricsSource: 'prometheus', + + // Import signal definitions (organized by dashboard) + signals+: { + cluster: (import './signals/cluster.libsonnet')(this), + elections: (import './signals/elections.libsonnet')(this), + operations: (import './signals/operations.libsonnet')(this), + performance: (import './signals/performance.libsonnet')(this), + sharding: (import './signals/sharding.libsonnet')(this), }, } diff --git a/mongodb-atlas-mixin/dashboards.libsonnet b/mongodb-atlas-mixin/dashboards.libsonnet new file mode 100644 index 000000000..8ed8fc38f --- /dev/null +++ b/mongodb-atlas-mixin/dashboards.libsonnet @@ -0,0 +1,176 @@ +local g = import './g.libsonnet'; + +{ + local root = self, + new(this): + local prefix = this.config.dashboardNamePrefix; + local links = this.grafana.links; + local tags = this.config.dashboardTags; + local uid = this.config.uid; + local vars = this.grafana.variables; + local annotations = this.grafana.annotations; + local refresh = this.config.dashboardRefresh; + local period = this.config.dashboardPeriod; + local timezone = this.config.dashboardTimezone; + + local rsNmVariable = g.dashboard.variable.query.new('rs_nm') + + g.dashboard.variable.custom.selectionOptions.withMulti(true) + + g.dashboard.variable.custom.selectionOptions.withIncludeAll(true) + + g.dashboard.variable.query.queryTypes.withLabelValues(label='rs_nm', metric='mongodb_network_bytesIn') + + g.dashboard.variable.query.withDatasourceFromVariable(variable=vars.datasources.prometheus) + + g.dashboard.variable.query.refresh.onTime(); + + { + 'mongodb-atlas-cluster-overview.json': + g.dashboard.new(prefix + ' cluster overview') + + g.dashboard.withDescription('Overview of MongoDB Atlas cluster metrics.') + + g.dashboard.withPanels( + g.util.panel.resolveCollapsedFlagOnRows( + g.util.grid.wrapPanels( + [ + this.grafana.rows.clusterOverviewShardRow, + this.grafana.rows.clusterOverviewConfigRow, + this.grafana.rows.clusterOverviewMongosRow, + this.grafana.rows.clusterOverviewPerformanceRow, + this.grafana.rows.clusterOverviewOperationsRow, + this.grafana.rows.clusterOverviewLocksRow, + ] + ) + ) + ) + + root.applyCommon( + vars.multiInstance + [ + rsNmVariable, + ], + uid + '-cluster-overview', + tags, + links { clusterOverview+:: {} }, + annotations, + timezone, + refresh, + period + ), + + 'mongodb-atlas-elections-overview.json': + g.dashboard.new(prefix + ' elections overview') + + g.dashboard.withDescription('Overview of MongoDB Atlas election metrics.') + + g.dashboard.withPanels( + g.util.panel.resolveCollapsedFlagOnRows( + g.util.grid.wrapPanels( + [ + this.grafana.rows.electionsRow, + this.grafana.rows.electionsCatchUpsRow, + ] + ) + ) + ) + + root.applyCommon( + vars.multiInstance + [ + rsNmVariable, + ], + uid + '-elections-overview', + tags, + links { electionsOverview+:: {} }, + annotations, + timezone, + refresh, + period + ), + + 'mongodb-atlas-operations-overview.json': + g.dashboard.new(prefix + ' operations overview') + + g.dashboard.withDescription('Overview of MongoDB Atlas operation metrics.') + + g.dashboard.withPanels( + g.util.panel.resolveCollapsedFlagOnRows( + g.util.grid.wrapPanels( + [ + this.grafana.rows.operationsCountersInstanceRow, + this.grafana.rows.operationsConnectionsRow, + this.grafana.rows.operationsReadWriteRow, + this.grafana.rows.operationsLocksRow, + ] + ) + ) + ) + + root.applyCommon( + vars.multiInstance + [ + rsNmVariable, + ], + uid + '-operations-overview', + tags, + links { operationsOverview+:: {} }, + annotations, + timezone, + refresh, + period + ), + + 'mongodb-atlas-performance-overview.json': + g.dashboard.new(prefix + ' performance overview') + + g.dashboard.withDescription('Overview of MongoDB Atlas performance metrics.') + + g.dashboard.withPanels( + g.util.panel.resolveCollapsedFlagOnRows( + g.util.grid.wrapPanels( + [ + this.grafana.rows.performanceMemoryHardwareRow, + this.grafana.rows.performanceDiskRow, + this.grafana.rows.performanceNetworkRow, + this.grafana.rows.performanceHardwareIORow, + ] + ) + ) + ) + + root.applyCommon( + vars.multiInstance + [ + rsNmVariable, + ], + uid + '-performance-overview', + tags, + links { performanceOverview+:: {} }, + annotations, + timezone, + refresh, + period + ), + } + + + if this.config.enableShardingOverview then + { + 'mongodb-atlas-sharding-overview.json': + g.dashboard.new(prefix + ' sharding overview') + + g.dashboard.withDescription('Overview of MongoDB Atlas sharding metrics.') + + g.dashboard.withPanels( + g.util.panel.resolveCollapsedFlagOnRows( + g.util.grid.wrapPanels( + [ + this.grafana.rows.shardingGeneralStatsRow, + this.grafana.rows.shardingCatalogCacheRow, + this.grafana.rows.shardingOperationsRow, + ] + ) + ) + ) + + root.applyCommon( + vars.multiInstance + [ + rsNmVariable, + ], + uid + '-sharding-overview', + tags, + links { shardingOverview+:: {} }, + annotations, + timezone, + refresh, + period + ), + } else {}, + + applyCommon(vars, uid, tags, links, annotations, timezone, refresh, period): + g.dashboard.withTags(tags) + + g.dashboard.withUid(uid) + + g.dashboard.withLinks(std.objectValues(links)) + + g.dashboard.withTimezone(timezone) + + g.dashboard.withRefresh(refresh) + + g.dashboard.time.withFrom(period) + + g.dashboard.withVariables(vars) + + g.dashboard.withAnnotations(std.objectValues(annotations)), +} diff --git a/mongodb-atlas-mixin/dashboards/dashboards.libsonnet b/mongodb-atlas-mixin/dashboards/dashboards.libsonnet deleted file mode 100644 index e7028638a..000000000 --- a/mongodb-atlas-mixin/dashboards/dashboards.libsonnet +++ /dev/null @@ -1,5 +0,0 @@ -(import 'mongodb-atlas-cluster-overview.libsonnet') + -(import 'mongodb-atlas-operations-overview.libsonnet') + -(import 'mongodb-atlas-performance-overview.libsonnet') + -(import 'mongodb-atlas-elections-overview.libsonnet') + -(import 'mongodb-atlas-sharding-overview.libsonnet') diff --git a/mongodb-atlas-mixin/dashboards/mongodb-atlas-cluster-overview.libsonnet b/mongodb-atlas-mixin/dashboards/mongodb-atlas-cluster-overview.libsonnet deleted file mode 100644 index 5520d34cb..000000000 --- a/mongodb-atlas-mixin/dashboards/mongodb-atlas-cluster-overview.libsonnet +++ /dev/null @@ -1,2159 +0,0 @@ -local g = (import 'grafana-builder/grafana.libsonnet'); -local grafana = (import 'grafonnet/grafana.libsonnet'); -local dashboard = grafana.dashboard; -local template = grafana.template; -local prometheus = grafana.prometheus; - -local dashboardUid = 'mongodb-atlas-cluster-overview'; - -local promDatasourceName = 'prometheus_datasource'; - -local promDatasource = { - uid: '${%s}' % promDatasourceName, -}; - -local shardRow = { - datasource: promDatasource, - targets: [ - prometheus.target( - '', - datasource=promDatasource, - legendFormat='', - format='time_series', - ), - ], - type: 'row', - title: 'Shard', - collapsed: false, -}; - -local shardNodesPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'mongodb_network_bytesIn{job=~"$job",cl_name=~"$cl_name"}', - datasource=promDatasource, - legendFormat='', - format='time_series', - ), - ], - type: 'table', - title: 'Shard nodes', - description: 'An inventory table for shard nodes in the environment.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - custom: { - align: 'center', - cellOptions: { - type: 'auto', - }, - filterable: false, - inspect: false, - }, - mappings: [ - { - options: { - '1': { - index: 0, - text: 'Primary', - }, - '2': { - index: 1, - text: 'Secondary', - }, - }, - type: 'value', - }, - ], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - }, - overrides: [ - { - matcher: { - id: 'byName', - options: 'cl_role', - }, - properties: [ - { - id: 'custom.width', - value: 150, - }, - ], - }, - { - matcher: { - id: 'byName', - options: 'rs_state', - }, - properties: [ - { - id: 'custom.width', - value: 100, - }, - ], - }, - { - matcher: { - id: 'byName', - options: 'rs_nm', - }, - properties: [ - { - id: 'custom.width', - value: 250, - }, - ], - }, - { - matcher: { - id: 'byName', - options: 'cl_name', - }, - properties: [ - { - id: 'custom.width', - value: 300, - }, - ], - }, - { - matcher: { - id: 'byName', - options: 'group_id', - }, - properties: [ - { - id: 'custom.width', - value: 300, - }, - ], - }, - { - matcher: { - id: 'byName', - options: 'State', - }, - properties: [ - { - id: 'custom.cellOptions', - value: { - type: 'color-text', - }, - }, - { - id: 'mappings', - value: [ - { - options: { - '1': { - color: 'green', - index: 0, - text: 'Primary', - }, - '2': { - color: 'yellow', - index: 1, - text: 'Secondary', - }, - }, - type: 'value', - }, - ], - }, - ], - }, - ], - }, - options: { - cellHeight: 'md', - footer: { - countRows: false, - enablePagination: false, - fields: '', - reducer: [ - 'sum', - ], - show: false, - }, - showHeader: true, - }, - pluginVersion: '10.2.0-59981', - transformations: [ - { - id: 'reduce', - options: { - labelsToFields: true, - reducers: [ - 'lastNotNull', - ], - }, - }, - { - id: 'organize', - options: { - excludeByName: { - Field: true, - 'Last *': true, - __name__: true, - job: true, - org_id: true, - process_port: true, - }, - indexByName: { - Field: 6, - 'Last *': 11, - __name__: 7, - cl_name: 1, - cl_role: 2, - group_id: 0, - instance: 3, - job: 8, - org_id: 9, - process_port: 10, - rs_nm: 4, - rs_state: 5, - }, - renameByName: { - cl_name: 'Cluster', - cl_role: 'Role', - group_id: 'Group', - instance: 'Node', - rs_nm: 'Replica set', - rs_state: 'State', - }, - }, - }, - { - id: 'filterByValue', - options: { - filters: [ - { - config: { - id: 'equal', - options: { - value: 'shardsvr', - }, - }, - fieldName: 'Role', - }, - ], - match: 'all', - type: 'include', - }, - }, - ], -}; - -local configRow = { - datasource: promDatasource, - targets: [ - prometheus.target( - '', - datasource=promDatasource, - legendFormat='', - format='time_series', - ), - ], - type: 'row', - title: 'Config', - collapsed: false, -}; - -local configNodesPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'mongodb_network_bytesIn{job=~"$job",cl_name=~"$cl_name"}', - datasource=promDatasource, - legendFormat='', - format='time_series', - ), - ], - type: 'table', - title: 'Config nodes', - description: 'An inventory table for config nodes in the environment.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - custom: { - align: 'center', - cellOptions: { - type: 'auto', - }, - filterable: false, - inspect: false, - }, - mappings: [ - { - options: { - '1': { - index: 0, - text: 'Primary', - }, - '2': { - index: 1, - text: 'Secondary', - }, - }, - type: 'value', - }, - ], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - }, - overrides: [ - { - matcher: { - id: 'byName', - options: 'cl_role', - }, - properties: [ - { - id: 'custom.width', - value: 150, - }, - ], - }, - { - matcher: { - id: 'byName', - options: 'rs_state', - }, - properties: [ - { - id: 'custom.width', - value: 100, - }, - ], - }, - { - matcher: { - id: 'byName', - options: 'rs_nm', - }, - properties: [ - { - id: 'custom.width', - value: 250, - }, - ], - }, - { - matcher: { - id: 'byName', - options: 'cl_name', - }, - properties: [ - { - id: 'custom.width', - value: 300, - }, - ], - }, - { - matcher: { - id: 'byName', - options: 'group_id', - }, - properties: [ - { - id: 'custom.width', - value: 300, - }, - ], - }, - { - matcher: { - id: 'byName', - options: 'State', - }, - properties: [ - { - id: 'custom.cellOptions', - value: { - type: 'color-text', - }, - }, - { - id: 'mappings', - value: [ - { - options: { - '1': { - color: 'green', - index: 0, - text: 'Primary', - }, - '2': { - color: 'yellow', - index: 1, - text: 'Secondary', - }, - }, - type: 'value', - }, - ], - }, - ], - }, - ], - }, - options: { - cellHeight: 'md', - footer: { - countRows: false, - enablePagination: false, - fields: '', - reducer: [ - 'sum', - ], - show: false, - }, - showHeader: true, - }, - pluginVersion: '10.2.0-59981', - transformations: [ - { - id: 'reduce', - options: { - labelsToFields: true, - reducers: [ - 'lastNotNull', - ], - }, - }, - { - id: 'organize', - options: { - excludeByName: { - Field: true, - 'Last *': true, - __name__: true, - job: true, - org_id: true, - process_port: true, - }, - indexByName: { - Field: 6, - 'Last *': 11, - __name__: 7, - cl_name: 1, - cl_role: 2, - group_id: 0, - instance: 3, - job: 8, - org_id: 9, - process_port: 10, - rs_nm: 4, - rs_state: 5, - }, - renameByName: { - cl_name: 'Cluster', - cl_role: 'Role', - group_id: 'Group', - instance: 'Node', - rs_nm: 'Replica set', - rs_state: 'State', - }, - }, - }, - { - id: 'filterByValue', - options: { - filters: [ - { - config: { - id: 'equal', - options: { - value: 'configsvr', - }, - }, - fieldName: 'Role', - }, - ], - match: 'all', - type: 'include', - }, - }, - ], -}; - -local mongosRow = { - datasource: promDatasource, - targets: [ - prometheus.target( - '', - datasource=promDatasource, - legendFormat='', - format='time_series', - ), - ], - type: 'row', - title: 'mongos', - collapsed: false, -}; - -local mongosNodesPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'mongodb_network_bytesIn{job=~"$job",cl_name=~"$cl_name"}', - datasource=promDatasource, - legendFormat='', - format='time_series', - ), - ], - type: 'table', - title: 'mongos nodes', - description: 'An inventory table for mongos nodes in the environment.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - custom: { - align: 'center', - cellOptions: { - type: 'auto', - }, - filterable: false, - inspect: false, - }, - mappings: [ - { - options: { - '1': { - index: 0, - text: 'Primary', - }, - '2': { - index: 1, - text: 'Secondary', - }, - }, - type: 'value', - }, - ], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - }, - overrides: [ - { - matcher: { - id: 'byName', - options: 'cl_role', - }, - properties: [ - { - id: 'custom.width', - value: 150, - }, - ], - }, - { - matcher: { - id: 'byName', - options: 'rs_state', - }, - properties: [ - { - id: 'custom.width', - value: 100, - }, - ], - }, - { - matcher: { - id: 'byName', - options: 'rs_nm', - }, - properties: [ - { - id: 'custom.width', - value: 250, - }, - ], - }, - { - matcher: { - id: 'byName', - options: 'cl_name', - }, - properties: [ - { - id: 'custom.width', - value: 300, - }, - ], - }, - { - matcher: { - id: 'byName', - options: 'group_id', - }, - properties: [ - { - id: 'custom.width', - value: 300, - }, - ], - }, - ], - }, - options: { - cellHeight: 'md', - footer: { - countRows: false, - enablePagination: false, - fields: '', - reducer: [ - 'sum', - ], - show: false, - }, - showHeader: true, - }, - pluginVersion: '10.2.0-59981', - transformations: [ - { - id: 'reduce', - options: { - labelsToFields: true, - reducers: [ - 'lastNotNull', - ], - }, - }, - { - id: 'organize', - options: { - excludeByName: { - Field: true, - 'Last *': true, - __name__: true, - job: true, - org_id: true, - process_port: true, - rs_state: true, - }, - indexByName: { - Field: 6, - 'Last *': 11, - __name__: 7, - cl_name: 1, - cl_role: 2, - group_id: 0, - instance: 3, - job: 8, - org_id: 9, - process_port: 10, - rs_nm: 4, - rs_state: 5, - }, - renameByName: { - cl_name: 'Cluster', - cl_role: 'Role', - group_id: 'Group', - instance: 'Node', - rs_nm: 'Replica set', - }, - }, - }, - { - id: 'filterByValue', - options: { - filters: [ - { - config: { - id: 'equal', - options: { - value: 'mongos', - }, - }, - fieldName: 'Role', - }, - ], - match: 'all', - type: 'include', - }, - }, - ], -}; - -local performanceRow = { - datasource: promDatasource, - targets: [ - prometheus.target( - '', - datasource=promDatasource, - legendFormat='', - format='time_series', - ), - ], - type: 'row', - title: 'Performance', - collapsed: false, -}; - -local hardwareIOPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum (rate(hardware_disk_metrics_read_count{job=~"$job",cl_name=~"$cl_name"}[$__rate_interval])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - reads', - format='time_series', - ), - prometheus.target( - 'sum (rate(hardware_disk_metrics_write_count{job=~"$job",cl_name=~"$cl_name"}[$__rate_interval])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - writes', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Hardware I/O', - description: "The number of read and write I/O's processed.", - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'iops', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local hardwareIOWaitTimePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum (increase(hardware_disk_metrics_read_time_milliseconds{job=~"$job",cl_name=~"$cl_name"}[$__interval:])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - reads', - format='time_series', - interval='1m', - ), - prometheus.target( - 'sum (increase(hardware_disk_metrics_write_time_milliseconds{job=~"$job",cl_name=~"$cl_name"}[$__interval:])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - writes', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Hardware I/O wait time / $__interval', - description: 'The amount of time spent waiting for I/O requests.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'ms', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local hardwareCPUInterruptServiceTimePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum (increase(hardware_system_cpu_irq_milliseconds{job=~"$job",cl_name=~"$cl_name"}[$__interval:])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}}', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Hardware CPU interrupt service time / $__interval', - description: 'The amount of time spent servicing CPU interrupts.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'ms', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local memoryUsedPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum (mongodb_mem_resident{job=~"$job",cl_name=~"$cl_name"}) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - RAM', - format='time_series', - ), - prometheus.target( - 'sum (mongodb_mem_virtual{job=~"$job",cl_name=~"$cl_name"}) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - virtual', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Memory used', - description: 'The amount of RAM and virtual memory being used.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'mbytes', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local diskSpaceUsagePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - '(sum (hardware_disk_metrics_disk_space_used_bytes{job=~"$job",cl_name=~"$cl_name"}) by(cl_name)) / (clamp_min(sum (hardware_disk_metrics_disk_space_free_bytes{job=~"$job",cl_name=~"$cl_name"}) by(cl_name) + sum (hardware_disk_metrics_disk_space_used_bytes{job=~"$job",cl_name=~"$cl_name"}) by(cl_name),0.1))', - datasource=promDatasource, - legendFormat='{{cl_name}}', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Disk space usage', - description: 'The percentage of hardware space used.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - max: 1, - min: 0, - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'percentunit', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local networkRequestsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum (rate(mongodb_network_numRequests{job=~"$job",cl_name=~"$cl_name"}[$__rate_interval])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}}', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Network requests', - description: 'The number of distinct requests that the server has received.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'reqps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local networkThroughputPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum (rate(mongodb_network_bytesIn{job=~"$job",cl_name=~"$cl_name"}[$__rate_interval])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - received', - format='time_series', - ), - prometheus.target( - 'sum (rate(mongodb_network_bytesOut{job=~"$job",cl_name=~"$cl_name"}[$__rate_interval])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - sent', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Network throughput', - description: 'The number of bytes sent and received over network connections.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'Bps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, - transformations: [], -}; - -local slowRequestsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum (rate(mongodb_network_numSlowDNSOperations{job=~"$job",cl_name=~"$cl_name"}[$__rate_interval])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - DNS', - format='time_series', - ), - prometheus.target( - 'sum (rate(mongodb_network_numSlowSSLOperations{job=~"$job",cl_name=~"$cl_name"}[$__rate_interval])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - SSL', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Slow requests', - description: 'The rate of DNS and SSL operations that took longer than 1 second.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'reqps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local operationsRow = { - datasource: promDatasource, - targets: [ - prometheus.target( - '', - datasource=promDatasource, - legendFormat='', - format='time_series', - ), - ], - type: 'row', - title: 'Operations', - collapsed: false, -}; - -local connectionsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum (rate(mongodb_connections_totalCreated{job=~"$job",cl_name=~"$cl_name"}[$__rate_interval])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}}', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Connections', - description: 'The rate of incoming connections to the cluster created.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'conns/s', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local readwriteOperationsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum (rate(mongodb_opLatencies_reads_ops{job=~"$job",cl_name=~"$cl_name"}[$__rate_interval])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - reads', - format='time_series', - ), - prometheus.target( - 'sum (rate(mongodb_opLatencies_writes_ops{job=~"$job",cl_name=~"$cl_name"}[$__rate_interval])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - writes', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Read/Write operations', - description: 'The number of read and write operations.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'ops', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local operationsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum (increase(mongodb_opcounters_insert{job=~"$job",cl_name=~"$cl_name"}[$__interval:])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - insert', - format='time_series', - interval='1m', - ), - prometheus.target( - 'sum (increase(mongodb_opcounters_query{job=~"$job",cl_name=~"$cl_name"}[$__interval:])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - query', - format='time_series', - interval='1m', - ), - prometheus.target( - 'sum (increase(mongodb_opcounters_update{job=~"$job",cl_name=~"$cl_name"}[$__interval:])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - update', - format='time_series', - interval='1m', - ), - prometheus.target( - 'sum (increase(mongodb_opcounters_delete{job=~"$job",cl_name=~"$cl_name"}[$__interval:])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - delete', - format='time_series', - interval='1m', - ), - ], - type: 'piechart', - title: 'Operations', - description: 'The number of insert, query, update, and delete operations.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - }, - mappings: [], - unit: 'none', - }, - overrides: [], - }, - options: { - displayLabels: [], - legend: { - displayMode: 'table', - placement: 'bottom', - showLegend: true, - values: [ - 'value', - ], - }, - pieType: 'pie', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local readwriteLatencyPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum (increase(mongodb_opLatencies_reads_latency{job=~"$job",cl_name=~"$cl_name"}[$__interval:])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - reads', - format='time_series', - interval='1m', - ), - prometheus.target( - 'sum (increase(mongodb_opLatencies_writes_latency{job=~"$job",cl_name=~"$cl_name"}[$__interval:])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - writes', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Read/Write latency / $__interval', - description: 'The latency for read and write operations.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'µs', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local locksRow = { - datasource: promDatasource, - targets: [ - prometheus.target( - '', - datasource=promDatasource, - legendFormat='', - format='time_series', - ), - ], - type: 'row', - title: 'Locks', - collapsed: false, -}; - -local currentQueuePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum (mongodb_globalLock_currentQueue_readers{job=~"$job",cl_name=~"$cl_name"}) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - reads', - format='time_series', - ), - prometheus.target( - 'sum (mongodb_globalLock_currentQueue_writers{job=~"$job",cl_name=~"$cl_name"}) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - writes', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Current queue', - description: 'The number of reads and writes queued because of a lock.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local activeClientOperationsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum (mongodb_globalLock_activeClients_readers{job=~"$job",cl_name=~"$cl_name"}) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - reads', - format='time_series', - ), - prometheus.target( - 'sum (mongodb_globalLock_activeClients_writers{job=~"$job",cl_name=~"$cl_name"}) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - writes', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Active client operations', - description: 'The number of reads and writes being actively performed by connected clients.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local databaseDeadlocksPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum (increase(mongodb_locks_Database_deadlockCount_W{job=~"$job",cl_name=~"$cl_name"}[$__interval:])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - exclusive', - format='time_series', - interval='1m', - ), - prometheus.target( - 'sum (increase(mongodb_locks_Database_deadlockCount_w{job=~"$job",cl_name=~"$cl_name"}[$__interval:])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - intent exclusive', - format='time_series', - interval='1m', - ), - prometheus.target( - 'sum (increase(mongodb_locks_Database_deadlockCount_R{job=~"$job",cl_name=~"$cl_name"}[$__interval:])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - shared', - format='time_series', - interval='1m', - ), - prometheus.target( - 'sum (increase(mongodb_locks_Database_deadlockCount_r{job=~"$job",cl_name=~"$cl_name"}[$__interval:])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - intent shared', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Database deadlocks / $__interval', - description: 'The number of deadlocks for database level locks.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'right', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local databaseWaitsAcquiringLockPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum (increase(mongodb_locks_Database_acquireWaitCount_W{job=~"$job",cl_name=~"$cl_name"}[$__interval:])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - exclusive', - format='time_series', - interval='1m', - ), - prometheus.target( - 'sum (increase(mongodb_locks_Database_acquireWaitCount_w{job=~"$job",cl_name=~"$cl_name"}[$__interval:])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - intent exclusive', - format='time_series', - interval='1m', - ), - prometheus.target( - 'sum (increase(mongodb_locks_Database_acquireWaitCount_R{job=~"$job",cl_name=~"$cl_name"}[$__interval:])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - shared', - format='time_series', - interval='1m', - ), - prometheus.target( - 'sum (increase(mongodb_locks_Database_acquireWaitCount_r{job=~"$job",cl_name=~"$cl_name"}[$__interval:])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - intent shared', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Database waits acquiring lock / $__interval', - description: 'The number of times lock acquisitions encounter waits for database level locks.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'right', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -{ - grafanaDashboards+:: { - 'mongodb-atlas-cluster-overview.json': - dashboard.new( - 'MongoDB Atlas cluster overview', - time_from='%s' % $._config.dashboardPeriod, - tags=($._config.dashboardTags), - timezone='%s' % $._config.dashboardTimezone, - refresh='%s' % $._config.dashboardRefresh, - description='', - uid=dashboardUid, - ) - - .addTemplates( - [ - template.datasource( - promDatasourceName, - 'prometheus', - null, - label='Data Source', - refresh='load' - ), - template.new( - 'job', - promDatasource, - 'label_values(mongodb_network_bytesIn,job)', - label='Job', - refresh=2, - multi=true, - sort=0 - ), - template.new( - 'cl_name', - promDatasource, - 'label_values(mongodb_network_bytesIn{job=~"$job"},cl_name)', - label='Atlas cluster', - refresh=2, - includeAll=true, - multi=true, - allValues='.+', - sort=0 - ), - ] - ) - .addLink(grafana.link.dashboards( - asDropdown=false, - title='MongoDB Atlas dashboards', - includeVars=true, - keepTime=true, - tags=($._config.dashboardTags), - )) - .addPanels( - [ - shardRow { gridPos: { h: 1, w: 24, x: 0, y: 0 } }, - shardNodesPanel { gridPos: { h: 6, w: 24, x: 0, y: 1 } }, - configRow { gridPos: { h: 1, w: 24, x: 0, y: 7 } }, - configNodesPanel { gridPos: { h: 6, w: 24, x: 0, y: 8 } }, - mongosRow { gridPos: { h: 1, w: 24, x: 0, y: 14 } }, - mongosNodesPanel { gridPos: { h: 6, w: 24, x: 0, y: 15 } }, - performanceRow { gridPos: { h: 1, w: 24, x: 0, y: 21 } }, - hardwareIOPanel { gridPos: { h: 6, w: 6, x: 0, y: 22 } }, - hardwareIOWaitTimePanel { gridPos: { h: 6, w: 6, x: 6, y: 22 } }, - hardwareCPUInterruptServiceTimePanel { gridPos: { h: 6, w: 6, x: 12, y: 22 } }, - memoryUsedPanel { gridPos: { h: 6, w: 6, x: 18, y: 22 } }, - diskSpaceUsagePanel { gridPos: { h: 6, w: 6, x: 0, y: 28 } }, - networkRequestsPanel { gridPos: { h: 6, w: 6, x: 6, y: 28 } }, - networkThroughputPanel { gridPos: { h: 6, w: 6, x: 12, y: 28 } }, - slowRequestsPanel { gridPos: { h: 6, w: 6, x: 18, y: 28 } }, - operationsRow { gridPos: { h: 1, w: 24, x: 0, y: 34 } }, - connectionsPanel { gridPos: { h: 6, w: 12, x: 0, y: 35 } }, - readwriteOperationsPanel { gridPos: { h: 12, w: 6, x: 12, y: 35 } }, - operationsPanel { gridPos: { h: 12, w: 6, x: 18, y: 35 } }, - readwriteLatencyPanel { gridPos: { h: 6, w: 12, x: 0, y: 41 } }, - locksRow { gridPos: { h: 1, w: 24, x: 0, y: 47 } }, - currentQueuePanel { gridPos: { h: 6, w: 12, x: 0, y: 48 } }, - activeClientOperationsPanel { gridPos: { h: 6, w: 12, x: 12, y: 48 } }, - databaseDeadlocksPanel { gridPos: { h: 6, w: 12, x: 0, y: 54 } }, - databaseWaitsAcquiringLockPanel { gridPos: { h: 6, w: 12, x: 12, y: 54 } }, - ] - ), - }, -} diff --git a/mongodb-atlas-mixin/dashboards/mongodb-atlas-elections-overview.libsonnet b/mongodb-atlas-mixin/dashboards/mongodb-atlas-elections-overview.libsonnet deleted file mode 100644 index eb26cc605..000000000 --- a/mongodb-atlas-mixin/dashboards/mongodb-atlas-elections-overview.libsonnet +++ /dev/null @@ -1,920 +0,0 @@ -local g = (import 'grafana-builder/grafana.libsonnet'); -local grafana = (import 'grafonnet/grafana.libsonnet'); -local dashboard = grafana.dashboard; -local template = grafana.template; -local prometheus = grafana.prometheus; - -local dashboardUid = 'mongodb-atlas-elections-overview'; - -local promDatasourceName = 'prometheus_datasource'; - -local promDatasource = { - uid: '${%s}' % promDatasourceName, -}; - -local stepupElectionsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_electionMetrics_stepUpCmd_called{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - called', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_electionMetrics_stepUpCmd_successful{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - successful', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Step-up elections / $__interval', - description: 'The number of elections called and elections won by the node when the primary stepped down.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local priorityElectionsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_electionMetrics_priorityTakeover_called{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - called', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_electionMetrics_priorityTakeover_successful{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - successful', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Priority elections / $__interval', - description: 'The number of elections called and elections won by the node when it had a higher priority than the primary node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local takeoverElectionsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_electionMetrics_catchUpTakeover_called{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - called', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_electionMetrics_catchUpTakeover_successful{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - successful', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Takeover elections / $__interval', - description: 'The number of elections called and elections won by the node when it was more current than the primary node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local timeoutElectionsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_electionMetrics_electionTimeout_called{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - called', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_electionMetrics_electionTimeout_successful{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - successful', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Timeout elections / $__interval', - description: 'The number of elections called and elections won by the node when the time it took to reach the primary node exceeded the election timeout limit.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local catchupsRow = { - datasource: promDatasource, - targets: [ - prometheus.target( - '', - datasource=promDatasource, - legendFormat='', - ), - ], - type: 'row', - title: 'Catch-ups', -}; - -local catchupsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_electionMetrics_numCatchUps{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Catch-ups / $__interval', - description: 'The number of times the node had to catch up to the highest known oplog entry.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local catchupsSkippedPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_electionMetrics_numCatchUpsSkipped{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Catch-ups skipped / $__interval', - description: 'The number of times the node skipped the catch up process when it was the newly elected primary.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local catchupsSucceededPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_electionMetrics_numCatchUpsSucceeded{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Catch-ups succeeded / $__interval', - description: 'The number of times the node succeeded in catching up when it was the newly elected primary.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local catchupsFailedPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_electionMetrics_numCatchUpsFailedWithError{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Catch-ups failed / $__interval', - description: 'The number of times the node failed in catching up when it was the newly elected primary.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local catchupTimeoutsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_electionMetrics_numCatchUpsTimedOut{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Catch-up timeouts / $__interval', - description: 'The number of times the node timed out during the catch-up process when it was the newly elected primary.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local averageCatchupOperationsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'mongodb_electionMetrics_averageCatchUpOps{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Average catch-up operations', - description: 'The average number of operations done during the catch-up process when this node is the newly elected primary.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -{ - grafanaDashboards+:: { - 'mongodb-atlas-elections-overview.json': - dashboard.new( - 'MongoDB Atlas election overview', - time_from='%s' % $._config.dashboardPeriod, - tags=($._config.dashboardTags), - timezone='%s' % $._config.dashboardTimezone, - refresh='%s' % $._config.dashboardRefresh, - description='', - uid=dashboardUid, - ) - - .addTemplates( - [ - template.datasource( - promDatasourceName, - 'prometheus', - null, - label='Data Source', - refresh='load' - ), - template.new( - 'job', - promDatasource, - 'label_values(mongodb_network_bytesIn,job)', - label='Job', - refresh=2, - multi=true, - sort=0 - ), - template.new( - 'cl_name', - promDatasource, - 'label_values(mongodb_network_bytesIn{job=~"$job"},cl_name)', - label='Atlas cluster', - refresh=2, - includeAll=true, - multi=true, - allValues='.+', - sort=0 - ), - template.new( - 'rs', - promDatasource, - 'label_values(mongodb_network_bytesIn{cl_name=~"$cl_name"},rs_nm)', - label='Replica set', - refresh=2, - includeAll=true, - multi=true, - allValues='.+', - sort=0 - ), - template.new( - 'instance', - promDatasource, - 'label_values(mongodb_network_bytesIn{rs_nm=~"$rs"},instance)', - label='Node', - refresh=2, - includeAll=true, - multi=true, - allValues='.+', - sort=0 - ), - ] - ) - .addLink(grafana.link.dashboards( - asDropdown=false, - title='MongoDB Atlas dashboards', - includeVars=true, - keepTime=true, - tags=($._config.dashboardTags), - )) - .addPanels( - [ - stepupElectionsPanel { gridPos: { h: 8, w: 12, x: 0, y: 0 } }, - priorityElectionsPanel { gridPos: { h: 8, w: 12, x: 12, y: 0 } }, - takeoverElectionsPanel { gridPos: { h: 8, w: 12, x: 0, y: 8 } }, - timeoutElectionsPanel { gridPos: { h: 8, w: 12, x: 12, y: 8 } }, - catchupsRow { gridPos: { h: 1, w: 24, x: 0, y: 16 } }, - catchupsPanel { gridPos: { h: 8, w: 12, x: 0, y: 17 } }, - catchupsSkippedPanel { gridPos: { h: 8, w: 12, x: 12, y: 17 } }, - catchupsSucceededPanel { gridPos: { h: 8, w: 12, x: 0, y: 25 } }, - catchupsFailedPanel { gridPos: { h: 8, w: 12, x: 12, y: 25 } }, - catchupTimeoutsPanel { gridPos: { h: 8, w: 12, x: 0, y: 33 } }, - averageCatchupOperationsPanel { gridPos: { h: 8, w: 12, x: 12, y: 33 } }, - ] - ), - }, -} diff --git a/mongodb-atlas-mixin/dashboards/mongodb-atlas-operations-overview.libsonnet b/mongodb-atlas-mixin/dashboards/mongodb-atlas-operations-overview.libsonnet deleted file mode 100644 index 617e3a6e5..000000000 --- a/mongodb-atlas-mixin/dashboards/mongodb-atlas-operations-overview.libsonnet +++ /dev/null @@ -1,1338 +0,0 @@ -local g = (import 'grafana-builder/grafana.libsonnet'); -local grafana = (import 'grafonnet/grafana.libsonnet'); -local dashboard = grafana.dashboard; -local template = grafana.template; -local prometheus = grafana.prometheus; - -local dashboardUid = 'mongodb-atlas-operations-overview'; - -local promDatasourceName = 'prometheus_datasource'; - -local promDatasource = { - uid: '${%s}' % promDatasourceName, -}; - -local queryOperationsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'rate(mongodb_opcounters_query{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Query operations', - description: 'The rate of query operations the node has received.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'ops', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local insertOperationsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'rate(mongodb_opcounters_insert{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Insert operations', - description: 'The rate of insert operations the node has received.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'ops', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local updateOperationsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'rate(mongodb_opcounters_update{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Update operations', - description: 'The rate of update operations this node has received.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'ops', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local deleteOperationsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'rate(mongodb_opcounters_delete{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Delete operations', - description: 'The rate of delete operations this node has received.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'ops', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local currentConnectionsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'mongodb_connections_current{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Current connections', - description: 'The number of incoming connections from clients to the node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local activeConnectionsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'mongodb_connections_active{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Active connections', - description: 'The number of connections that currently have operations in progress.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local readAndWriteOperationsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'rate(mongodb_opLatencies_reads_ops{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - reads', - format='time_series', - ), - prometheus.target( - 'rate(mongodb_opLatencies_writes_ops{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - writes', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Read and write operations', - description: 'The rate of read and write operations performed by the node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'ops', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local readAndWriteLatencyPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_opLatencies_reads_latency{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - reads', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_opLatencies_writes_latency{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - writes', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Read and write latency / $__interval', - description: 'The latency time for read and write operations performed by this node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'µs', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local locksRow = { - datasource: promDatasource, - targets: [ - prometheus.target( - '', - datasource=promDatasource, - legendFormat='', - format='time_series', - ), - ], - type: 'row', - title: 'Locks', -}; - -local databaseDeadlocksPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_locks_Database_deadlockCount_W{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - exclusive', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_locks_Database_deadlockCount_w{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - intent exclusive', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_locks_Database_deadlockCount_R{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - shared', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_locks_Database_deadlockCount_r{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - intent shared', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Database deadlocks / $__interval', - description: 'The number of deadlocks that have occurred for the database lock.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local databaseWaitCountPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_locks_Database_acquireWaitCount_W{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - exclusive', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_locks_Database_acquireWaitCount_w{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - intent exclusive', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_locks_Database_acquireWaitCount_R{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - shared', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_locks_Database_acquireWaitCount_r{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - intent shared', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Database wait count / $__interval', - description: 'The number of database lock acquisitions that had to wait.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local databaseWaitTimePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_locks_Database_timeAcquiringMicros_W{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - exclusive', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_locks_Database_timeAcquiringMicros_w{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - intent exclusive', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_locks_Database_timeAcquiringMicros_R{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - shared', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_locks_Database_timeAcquiringMicros_r{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - intent shared', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Database wait time / $__interval', - description: 'The time spent waiting for the database lock acquisition.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'µs', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local collectionDeadlocksPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_locks_Collection_deadlockCount_W{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - exclusive', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_locks_Collection_deadlockCount_w{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - intent exclusive', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_locks_Collection_deadlockCount_R{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - shared', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_locks_Collection_deadlockCount_r{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - intent shared', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Collection deadlocks / $__interval', - description: 'The number of deadlocks that have occurred for the collection lock.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local collectionWaitCountPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_locks_Collection_acquireWaitCount_W{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - exclusive', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_locks_Collection_acquireWaitCount_w{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - intent exclusive', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_locks_Collection_acquireWaitCount_R{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - shared', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_locks_Collection_acquireWaitCount_r{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - intent shared', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Collection wait count / $__interval', - description: 'The number of collection lock acquisitions that had to wait.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local collectionWaitTimePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_locks_Collection_timeAcquiringMicros_W{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - exclusive', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_locks_Collection_timeAcquiringMicros_w{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - intent exclusive', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_locks_Collection_timeAcquiringMicros_R{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - shared', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_locks_Collection_timeAcquiringMicros_r{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - intent shared', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Collection wait time / $__interval', - description: 'The time spent waiting for the collection lock acquisition.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'µs', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -{ - grafanaDashboards+:: { - 'mongodb-atlas-operations-overview.json': - dashboard.new( - 'MongoDB Atlas operations overview', - time_from='%s' % $._config.dashboardPeriod, - tags=($._config.dashboardTags), - timezone='%s' % $._config.dashboardTimezone, - refresh='%s' % $._config.dashboardRefresh, - description='', - uid=dashboardUid, - ) - - .addTemplates( - [ - template.datasource( - promDatasourceName, - 'prometheus', - null, - label='Data Source', - refresh='load' - ), - template.new( - 'job', - promDatasource, - 'label_values(mongodb_network_bytesIn,job)', - label='Job', - refresh=2, - multi=true, - sort=0 - ), - template.new( - 'cl_name', - promDatasource, - 'label_values(mongodb_network_bytesIn{job=~"$job"},cl_name)', - label='Atlas cluster', - refresh=2, - includeAll=true, - multi=true, - allValues='.+', - sort=0 - ), - template.new( - 'rs', - promDatasource, - 'label_values(mongodb_network_bytesIn{cl_name=~"$cl_name"},rs_nm)', - label='Replica set', - refresh=2, - includeAll=true, - multi=true, - allValues='.+', - sort=0 - ), - template.new( - 'instance', - promDatasource, - 'label_values(mongodb_network_bytesIn{rs_nm=~"$rs"},instance)', - label='Node', - refresh=2, - includeAll=true, - multi=true, - allValues='.+', - sort=0 - ), - ] - ) - .addLink(grafana.link.dashboards( - asDropdown=false, - title='MongoDB Atlas dashboards', - includeVars=true, - keepTime=true, - tags=($._config.dashboardTags), - )) - .addPanels( - [ - queryOperationsPanel { gridPos: { h: 8, w: 12, x: 0, y: 0 } }, - insertOperationsPanel { gridPos: { h: 8, w: 12, x: 12, y: 0 } }, - updateOperationsPanel { gridPos: { h: 8, w: 12, x: 0, y: 8 } }, - deleteOperationsPanel { gridPos: { h: 8, w: 12, x: 12, y: 8 } }, - currentConnectionsPanel { gridPos: { h: 8, w: 12, x: 0, y: 16 } }, - activeConnectionsPanel { gridPos: { h: 8, w: 12, x: 12, y: 16 } }, - readAndWriteOperationsPanel { gridPos: { h: 8, w: 12, x: 0, y: 24 } }, - readAndWriteLatencyPanel { gridPos: { h: 8, w: 12, x: 12, y: 24 } }, - locksRow { gridPos: { h: 1, w: 24, x: 0, y: 32 } }, - databaseDeadlocksPanel { gridPos: { h: 8, w: 8, x: 0, y: 33 } }, - databaseWaitCountPanel { gridPos: { h: 8, w: 8, x: 8, y: 33 } }, - databaseWaitTimePanel { gridPos: { h: 8, w: 8, x: 16, y: 33 } }, - collectionDeadlocksPanel { gridPos: { h: 8, w: 8, x: 0, y: 41 } }, - collectionWaitCountPanel { gridPos: { h: 8, w: 8, x: 8, y: 41 } }, - collectionWaitTimePanel { gridPos: { h: 8, w: 8, x: 16, y: 41 } }, - ] - ), - }, -} diff --git a/mongodb-atlas-mixin/dashboards/mongodb-atlas-performance-overview.libsonnet b/mongodb-atlas-mixin/dashboards/mongodb-atlas-performance-overview.libsonnet deleted file mode 100644 index e49eb0afd..000000000 --- a/mongodb-atlas-mixin/dashboards/mongodb-atlas-performance-overview.libsonnet +++ /dev/null @@ -1,833 +0,0 @@ -local g = (import 'grafana-builder/grafana.libsonnet'); -local grafana = (import 'grafonnet/grafana.libsonnet'); -local dashboard = grafana.dashboard; -local template = grafana.template; -local prometheus = grafana.prometheus; - -local dashboardUid = 'mongodb-atlas-performance-overview'; - -local promDatasourceName = 'prometheus_datasource'; - -local promDatasource = { - uid: '${%s}' % promDatasourceName, -}; - -local memoryPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'mongodb_mem_resident{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}', - datasource=promDatasource, - legendFormat='{{instance}} - RAM', - format='time_series', - ), - prometheus.target( - 'mongodb_mem_virtual{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}', - datasource=promDatasource, - legendFormat='{{instance}} - virtual', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Memory', - description: 'The amount of RAM and virtual memory being used by the database process.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'mbytes', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local hardwareCPUInterruptServiceTimePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(hardware_system_cpu_irq_milliseconds{job=~"$job",cl_name=~"$cl_name",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Hardware CPU interrupt service time / $__interval', - description: 'The amount of time spent servicing CPU interrupts.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'ms', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local diskSpacePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'hardware_disk_metrics_disk_space_free_bytes{job=~"$job",cl_name=~"$cl_name",instance=~"$instance"}', - datasource=promDatasource, - legendFormat='{{instance}} - free', - format='time_series', - ), - prometheus.target( - 'hardware_disk_metrics_disk_space_used_bytes{job=~"$job",cl_name=~"$cl_name",instance=~"$instance"}', - datasource=promDatasource, - legendFormat='{{instance}} - used', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Disk space', - description: "The amount of free and used disk space on this node's hardware.", - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'decbytes', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local diskSpaceUtilizationPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - '(hardware_disk_metrics_disk_space_used_bytes{job=~"$job",cl_name=~"$cl_name",instance=~"$instance"}) / clamp_min((hardware_disk_metrics_disk_space_free_bytes{job=~"$job",cl_name=~"$cl_name",instance=~"$instance"}) + (hardware_disk_metrics_disk_space_used_bytes{job=~"$job",cl_name=~"$cl_name",instance=~"$instance"}), 1)', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Disk space utilization', - description: "The disk space utilization for this node's hardware.", - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - max: 1, - min: 0, - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'percentunit', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local networkRequestsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'rate(mongodb_network_numRequests{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Network requests', - description: 'The rate of distinct requests the node has received.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'reqps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local slowNetworkRequestsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'rate(mongodb_network_numSlowDNSOperations{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - DNS', - format='time_series', - ), - prometheus.target( - 'rate(mongodb_network_numSlowSSLOperations{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - SSL', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Slow network requests', - description: 'The rate of slow DNS and SSL operations received by this node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'reqps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local networkThroughputPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'rate(mongodb_network_bytesIn{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - received', - format='time_series', - ), - prometheus.target( - 'rate(mongodb_network_bytesOut{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - sent', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Network throughput', - description: 'The rate of bytes sent and received by the node over a network connection.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'Bps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local hardwareIOPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'rate(hardware_disk_metrics_read_count{job=~"$job",cl_name=~"$cl_name",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - reads', - format='time_series', - ), - prometheus.target( - 'rate(hardware_disk_metrics_write_count{job=~"$job",cl_name=~"$cl_name",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - writes', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Hardware I/O', - description: "The rate of read and write I/O's processed by this node.", - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'iops', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local hardwareIOWaitTimePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(hardware_disk_metrics_read_time_milliseconds{job=~"$job",cl_name=~"$cl_name",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - read', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(hardware_disk_metrics_write_time_milliseconds{job=~"$job",cl_name=~"$cl_name",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - write', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Hardware I/O wait time / $__interval', - description: "The amount of time the node has spent waiting for read and write I/O's to process.", - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'ms', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -{ - grafanaDashboards+:: { - 'mongodb-atlas-performance-overview.json': - dashboard.new( - 'MongoDB Atlas performance overview', - time_from='%s' % $._config.dashboardPeriod, - tags=($._config.dashboardTags), - timezone='%s' % $._config.dashboardTimezone, - refresh='%s' % $._config.dashboardRefresh, - description='', - uid=dashboardUid, - ) - - .addTemplates( - [ - template.datasource( - promDatasourceName, - 'prometheus', - null, - label='Data Source', - refresh='load' - ), - template.new( - 'job', - promDatasource, - 'label_values(mongodb_network_bytesIn,job)', - label='Job', - refresh=2, - multi=true, - sort=0 - ), - template.new( - 'cl_name', - promDatasource, - 'label_values(mongodb_network_bytesIn{job=~"$job"},cl_name)', - label='Atlas cluster', - refresh=2, - includeAll=true, - multi=true, - allValues='.+', - sort=0 - ), - template.new( - 'rs', - promDatasource, - 'label_values(mongodb_network_bytesIn{cl_name=~"$cl_name"},rs_nm)', - label='Replica set', - refresh=2, - includeAll=true, - multi=true, - allValues='.+', - sort=0 - ), - template.new( - 'instance', - promDatasource, - 'label_values(mongodb_network_bytesIn{rs_nm=~"$rs"},instance)', - label='Node', - refresh=2, - includeAll=true, - multi=true, - allValues='.+', - sort=0 - ), - ] - ) - .addLink(grafana.link.dashboards( - asDropdown=false, - title='MongoDB Atlas dashboards', - includeVars=true, - keepTime=true, - tags=($._config.dashboardTags), - )) - .addPanels( - [ - memoryPanel { gridPos: { h: 8, w: 12, x: 0, y: 0 } }, - hardwareCPUInterruptServiceTimePanel { gridPos: { h: 8, w: 12, x: 12, y: 0 } }, - diskSpacePanel { gridPos: { h: 8, w: 12, x: 0, y: 8 } }, - diskSpaceUtilizationPanel { gridPos: { h: 8, w: 12, x: 12, y: 8 } }, - networkRequestsPanel { gridPos: { h: 8, w: 12, x: 0, y: 16 } }, - slowNetworkRequestsPanel { gridPos: { h: 8, w: 12, x: 12, y: 16 } }, - networkThroughputPanel { gridPos: { h: 8, w: 24, x: 0, y: 24 } }, - hardwareIOPanel { gridPos: { h: 8, w: 24, x: 0, y: 32 } }, - hardwareIOWaitTimePanel { gridPos: { h: 8, w: 24, x: 0, y: 40 } }, - ] - ), - }, -} diff --git a/mongodb-atlas-mixin/dashboards/mongodb-atlas-sharding-overview.libsonnet b/mongodb-atlas-mixin/dashboards/mongodb-atlas-sharding-overview.libsonnet deleted file mode 100644 index ee66a02da..000000000 --- a/mongodb-atlas-mixin/dashboards/mongodb-atlas-sharding-overview.libsonnet +++ /dev/null @@ -1,1335 +0,0 @@ -local g = (import 'grafana-builder/grafana.libsonnet'); -local grafana = (import 'grafonnet/grafana.libsonnet'); -local dashboard = grafana.dashboard; -local template = grafana.template; -local prometheus = grafana.prometheus; - -local dashboardUid = 'mongodb-atlas-sharding-overview'; - -local promDatasourceName = 'prometheus_datasource'; - -local promDatasource = { - uid: '${%s}' % promDatasourceName, -}; - -local staleConfigsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_shardingStatistics_countStaleConfigErrors{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Stale configs / $__interval', - description: 'Number of times that a thread hit a stale config exception and triggered a metadata refresh.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local chunkMigrationsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_shardingStatistics_countRecipientMoveChunkStarted{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Chunk migrations / $__interval', - description: 'Chunk migration frequency for this node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local docsClonedPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_shardingStatistics_countDocsClonedOnDonor{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - donor', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_shardingStatistics_countDocsClonedOnRecipient{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - recipient', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Docs cloned / $__interval', - description: 'The number of documents cloned on this node when it acted as primary for the donor and acted as primary for the recipient.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local criticalSectionTimePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_shardingStatistics_totalCriticalSectionTimeMillis{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Critical section time / $__interval', - description: 'The time taken by the catch-up and update metadata phases of a range migration, by this node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'ms', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local catalogCacheRow = { - datasource: promDatasource, - targets: [ - prometheus.target( - '', - datasource=promDatasource, - legendFormat='', - format='time_series', - ), - ], - type: 'row', - title: 'Catalog cache', - collapsed: false, -}; - -local refreshesStartedPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_shardingStatistics_catalogCache_countIncrementalRefreshesStarted{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - incremental', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_shardingStatistics_catalogCache_countFullRefreshesStarted{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - full', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Refreshes started / $__interval', - description: 'The number of incremental and full refreshes that have started.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local refreshesFailedPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_shardingStatistics_catalogCache_countFailedRefreshes{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Refreshes failed / $__interval', - description: 'The number of full and incremental refreshes that have failed.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local cacheStaleConfigsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_shardingStatistics_catalogCache_countStaleConfigErrors{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Cache stale configs / $__interval', - description: 'The number of times that a thread hit a stale config exception for the catalog cache and triggered a metadata refresh.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local cacheEntriesPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_shardingStatistics_catalogCache_numDatabaseEntries{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - database', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_shardingStatistics_catalogCache_numCollectionEntries{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - collection', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Cache entries / $__interval', - description: 'The number of database and collection entries that are currently in the catalog cache.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local cacheRefreshTimePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_shardingStatistics_catalogCache_totalRefreshWaitTimeMicros{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Cache refresh time / $__interval', - description: 'The amount of time that threads had to wait for a refresh of the metadata.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'µs', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local cacheOperationsBlockedPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'rate(mongodb_shardingStatistics_catalogCache_operationsBlockedByRefresh_countAllOperations{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Cache operations blocked', - description: 'The rate of operations that are blocked by a refresh of the catalog cache. Specific to mongos nodes found under replica set "none".', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'ops', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local shardOperationsRow = { - datasource: promDatasource, - targets: [ - prometheus.target( - '', - datasource=promDatasource, - legendFormat='', - format='time_series', - ), - ], - type: 'row', - title: 'Shard operations', -}; - -local allShardsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'rate(mongodb_shardingStatistics_numHostsTargeted_find_allShards{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - find', - format='time_series', - ), - prometheus.target( - 'rate(mongodb_shardingStatistics_numHostsTargeted_insert_allShards{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - insert', - format='time_series', - ), - prometheus.target( - 'rate(mongodb_shardingStatistics_numHostsTargeted_update_allShards{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - update', - format='time_series', - ), - prometheus.target( - 'rate(mongodb_shardingStatistics_numHostsTargeted_delete_allShards{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - delete', - format='time_series', - ), - prometheus.target( - 'rate(mongodb_shardingStatistics_numHostsTargeted_aggregate_allShards{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - aggregate', - format='time_series', - ), - ], - type: 'timeseries', - title: 'All shards', - description: 'The rate of CRUD operations and aggregation commands run that targeted all shards. Specific to mongos nodes found under replica set "none".', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'ops', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'right', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local manyShardsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'rate(mongodb_shardingStatistics_numHostsTargeted_find_manyShards{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - find', - format='time_series', - ), - prometheus.target( - 'rate(mongodb_shardingStatistics_numHostsTargeted_insert_manyShards{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - insert', - format='time_series', - ), - prometheus.target( - 'rate(mongodb_shardingStatistics_numHostsTargeted_update_manyShards{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - update', - format='time_series', - ), - prometheus.target( - 'rate(mongodb_shardingStatistics_numHostsTargeted_delete_manyShards{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - delete', - format='time_series', - ), - prometheus.target( - 'rate(mongodb_shardingStatistics_numHostsTargeted_aggregate_manyShards{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - aggregate', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Many shards', - description: 'The rate of CRUD operations and aggregation commands run that targeted more than 1 shard. Specific to mongos nodes found under replica set "none".', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'ops', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'right', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local oneShardPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'rate(mongodb_shardingStatistics_numHostsTargeted_find_oneShard{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - find', - format='time_series', - ), - prometheus.target( - 'rate(mongodb_shardingStatistics_numHostsTargeted_insert_oneShard{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - insert', - format='time_series', - ), - prometheus.target( - 'rate(mongodb_shardingStatistics_numHostsTargeted_update_oneShard{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - update', - format='time_series', - ), - prometheus.target( - 'rate(mongodb_shardingStatistics_numHostsTargeted_delete_oneShard{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - delete', - format='time_series', - ), - prometheus.target( - 'rate(mongodb_shardingStatistics_numHostsTargeted_aggregate_oneShard{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - aggregate', - format='time_series', - ), - ], - type: 'timeseries', - title: 'One shard', - description: 'The rate of CRUD operations and aggregation commands run that targeted 1 shard. Specific to mongos nodes found under replica set "none".', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'ops', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'right', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local unshardedPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'rate(mongodb_shardingStatistics_numHostsTargeted_find_unsharded{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - find', - format='time_series', - ), - prometheus.target( - 'rate(mongodb_shardingStatistics_numHostsTargeted_insert_unsharded{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - insert', - format='time_series', - ), - prometheus.target( - 'rate(mongodb_shardingStatistics_numHostsTargeted_update_unsharded{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - update', - format='time_series', - ), - prometheus.target( - 'rate(mongodb_shardingStatistics_numHostsTargeted_delete_unsharded{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - delete', - format='time_series', - ), - prometheus.target( - 'rate(mongodb_shardingStatistics_numHostsTargeted_aggregate_unsharded{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - aggregate', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Unsharded', - description: 'The rate of CRUD operations and aggregation commands run on an unsharded collection. Specific to mongos nodes found under replica set "none".', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'ops', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'right', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -{ - grafanaDashboards+:: - if $._config.enableShardingOverview then { - 'mongodb-atlas-sharding-overview.json': - dashboard.new( - 'MongoDB Atlas sharding overview', - time_from='%s' % $._config.dashboardPeriod, - tags=($._config.dashboardTags), - timezone='%s' % $._config.dashboardTimezone, - refresh='%s' % $._config.dashboardRefresh, - description='', - uid=dashboardUid, - ) - - .addTemplates( - [ - template.datasource( - promDatasourceName, - 'prometheus', - null, - label='Data Source', - refresh='load' - ), - template.new( - 'job', - promDatasource, - 'label_values(mongodb_network_bytesIn,job)', - label='Job', - refresh=2, - multi=true, - sort=0 - ), - template.new( - 'cl_name', - promDatasource, - 'label_values(mongodb_network_bytesIn{job=~"$job"},cl_name)', - label='Atlas cluster', - refresh=2, - includeAll=true, - multi=true, - allValues='.+', - sort=0 - ), - template.new( - 'rs', - promDatasource, - 'label_values(mongodb_network_bytesIn{cl_name=~"$cl_name"},rs_nm)', - label='Replica set', - refresh=2, - includeAll=true, - multi=true, - allValues='.+', - sort=0 - ), - template.new( - 'instance', - promDatasource, - 'label_values(mongodb_network_bytesIn{rs_nm=~"$rs"},instance)', - label='Node', - refresh=2, - includeAll=true, - multi=true, - allValues='.+', - sort=0 - ), - ] - ) - .addLink(grafana.link.dashboards( - asDropdown=false, - title='MongoDB Atlas dashboards', - includeVars=true, - keepTime=true, - tags=($._config.dashboardTags), - )) - .addPanels( - [ - staleConfigsPanel { gridPos: { h: 8, w: 12, x: 0, y: 0 } }, - chunkMigrationsPanel { gridPos: { h: 8, w: 12, x: 12, y: 0 } }, - docsClonedPanel { gridPos: { h: 8, w: 12, x: 0, y: 8 } }, - criticalSectionTimePanel { gridPos: { h: 8, w: 12, x: 12, y: 8 } }, - catalogCacheRow { gridPos: { h: 1, w: 24, x: 0, y: 16 } }, - refreshesStartedPanel { gridPos: { h: 8, w: 12, x: 0, y: 17 } }, - refreshesFailedPanel { gridPos: { h: 8, w: 12, x: 12, y: 17 } }, - cacheStaleConfigsPanel { gridPos: { h: 8, w: 6, x: 0, y: 25 } }, - cacheEntriesPanel { gridPos: { h: 8, w: 6, x: 6, y: 25 } }, - cacheRefreshTimePanel { gridPos: { h: 8, w: 6, x: 12, y: 25 } }, - cacheOperationsBlockedPanel { gridPos: { h: 8, w: 6, x: 18, y: 25 } }, - shardOperationsRow { gridPos: { h: 1, w: 24, x: 0, y: 33 } }, - allShardsPanel { gridPos: { h: 8, w: 12, x: 0, y: 34 } }, - manyShardsPanel { gridPos: { h: 8, w: 12, x: 12, y: 34 } }, - oneShardPanel { gridPos: { h: 8, w: 12, x: 0, y: 42 } }, - unshardedPanel { gridPos: { h: 8, w: 12, x: 12, y: 42 } }, - ] - ), - } else {}, -} diff --git a/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-cluster-overview.json b/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-cluster-overview.json index 8e7cfba03..8564b3790 100644 --- a/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-cluster-overview.json +++ b/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-cluster-overview.json @@ -1,60 +1,52 @@ { - "__inputs": [ ], - "__requires": [ ], "annotations": { "list": [ ] }, - "description": "", - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "id": null, + "description": "Overview of MongoDB Atlas cluster metrics.", "links": [ { - "asDropdown": false, - "icon": "external link", - "includeVars": true, "keepTime": true, - "tags": [ - "mongodb-atlas-mixin" - ], - "targetBlank": false, - "title": "MongoDB Atlas dashboards", - "type": "dashboards", - "url": "" + "title": "MongoDB Atlas elections overview", + "type": "link", + "url": "/d/mongodb-atlas-elections-overview" + }, + { + "keepTime": true, + "title": "MongoDB Atlas operations overview", + "type": "link", + "url": "/d/mongodb-atlas-operations-overview" + }, + { + "keepTime": true, + "title": "MongoDB Atlas performance overview", + "type": "link", + "url": "/d/mongodb-atlas-performance-overview" + }, + { + "keepTime": true, + "title": "MongoDB Atlas sharding overview", + "type": "link", + "url": "/d/mongodb-atlas-sharding-overview" } ], "panels": [ { "collapsed": false, - "datasource": { - "uid": "${prometheus_datasource}" - }, "gridPos": { "h": 1, - "w": 24, + "w": 0, "x": 0, "y": 0 }, - "id": 2, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "" - } - ], + "id": 1, + "panels": [ ], "title": "Shard", "type": "row" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "An inventory table for shard nodes in the environment.", "fieldConfig": { @@ -62,14 +54,6 @@ "color": { "mode": "thresholds" }, - "custom": { - "align": "center", - "cellOptions": { - "type": "auto" - }, - "filterable": false, - "inspect": false - }, "mappings": [ { "options": { @@ -84,16 +68,7 @@ }, "type": "value" } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } + ] }, "overrides": [ { @@ -193,35 +168,24 @@ ] }, "gridPos": { - "h": 6, + "h": 8, "w": 24, "x": 0, "y": 1 }, - "id": 3, - "options": { - "cellHeight": "md", - "footer": { - "countRows": false, - "enablePagination": false, - "fields": "", - "reducer": [ - "sum" - ], - "show": false - }, - "showHeader": true - }, - "pluginVersion": "10.2.0-59981", + "id": 2, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "mongodb_network_bytesIn{job=~\"$job\",cl_name=~\"$cl_name\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "" + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "mongodb_network_bytesIn{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "Shard node representative metric" } ], "title": "Shard nodes", @@ -293,33 +257,21 @@ }, { "collapsed": false, - "datasource": { - "uid": "${prometheus_datasource}" - }, "gridPos": { "h": 1, - "w": 24, + "w": 0, "x": 0, - "y": 7 + "y": 9 }, - "id": 4, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "" - } - ], + "id": 3, + "panels": [ ], "title": "Config", "type": "row" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "An inventory table for config nodes in the environment.", "fieldConfig": { @@ -327,14 +279,6 @@ "color": { "mode": "thresholds" }, - "custom": { - "align": "center", - "cellOptions": { - "type": "auto" - }, - "filterable": false, - "inspect": false - }, "mappings": [ { "options": { @@ -349,16 +293,7 @@ }, "type": "value" } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } + ] }, "overrides": [ { @@ -458,35 +393,24 @@ ] }, "gridPos": { - "h": 6, + "h": 8, "w": 24, "x": 0, - "y": 8 + "y": 10 }, - "id": 5, - "options": { - "cellHeight": "md", - "footer": { - "countRows": false, - "enablePagination": false, - "fields": "", - "reducer": [ - "sum" - ], - "show": false - }, - "showHeader": true - }, - "pluginVersion": "10.2.0-59981", + "id": 4, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "mongodb_network_bytesIn{job=~\"$job\",cl_name=~\"$cl_name\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "" + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "mongodb_network_bytesIn{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "Config inventory representative metric" } ], "title": "Config nodes", @@ -558,33 +482,21 @@ }, { "collapsed": false, - "datasource": { - "uid": "${prometheus_datasource}" - }, "gridPos": { "h": 1, - "w": 24, + "w": 0, "x": 0, - "y": 14 + "y": 18 }, - "id": 6, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "" - } - ], + "id": 5, + "panels": [ ], "title": "mongos", "type": "row" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "An inventory table for mongos nodes in the environment.", "fieldConfig": { @@ -592,14 +504,6 @@ "color": { "mode": "thresholds" }, - "custom": { - "align": "center", - "cellOptions": { - "type": "auto" - }, - "filterable": false, - "inspect": false - }, "mappings": [ { "options": { @@ -614,16 +518,7 @@ }, "type": "value" } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } + ] }, "overrides": [ { @@ -689,35 +584,24 @@ ] }, "gridPos": { - "h": 6, + "h": 8, "w": 24, "x": 0, - "y": 15 + "y": 19 }, - "id": 7, - "options": { - "cellHeight": "md", - "footer": { - "countRows": false, - "enablePagination": false, - "fields": "", - "reducer": [ - "sum" - ], - "show": false - }, - "showHeader": true - }, - "pluginVersion": "10.2.0-59981", + "id": 6, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "mongodb_network_bytesIn{job=~\"$job\",cl_name=~\"$cl_name\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "" + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "mongodb_network_bytesIn{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "Mongos node representative metric" } ], "title": "mongos nodes", @@ -789,121 +673,78 @@ }, { "collapsed": false, - "datasource": { - "uid": "${prometheus_datasource}" - }, "gridPos": { "h": 1, - "w": 24, + "w": 0, "x": 0, - "y": 21 + "y": 27 }, - "id": 8, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "" - } - ], + "id": 7, + "panels": [ ], "title": "Performance", "type": "row" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The number of read and write I/O's processed.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, "unit": "iops" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, - "w": 6, + "h": 8, + "w": 12, "x": 0, - "y": 22 + "y": 28 }, - "id": 9, + "id": 8, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (rate(hardware_disk_metrics_read_count{job=~\"$job\",cl_name=~\"$cl_name\"}[$__rate_interval])) by (cl_name)", + "expr": "sum(rate(hardware_disk_metrics_read_count{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\",cl_name=~\"$cl_name\"}[$__rate_interval])) by (cl_name)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - reads" + "instant": false, + "legendFormat": "{{cl_name}} - reads", + "refId": "Disk read operations" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (rate(hardware_disk_metrics_write_count{job=~\"$job\",cl_name=~\"$cl_name\"}[$__rate_interval])) by (cl_name)", + "expr": "sum(rate(hardware_disk_metrics_write_count{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\",cl_name=~\"$cl_name\"}[$__rate_interval])) by (cl_name)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - writes" + "instant": false, + "legendFormat": "{{cl_name}} - writes", + "refId": "Disk write operations" } ], "title": "Hardware I/O", @@ -911,97 +752,67 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The amount of time spent waiting for I/O requests.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, "unit": "ms" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, - "w": 6, - "x": 6, - "y": 22 + "h": 8, + "w": 12, + "x": 12, + "y": 28 }, - "id": 10, + "id": 9, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (increase(hardware_disk_metrics_read_time_milliseconds{job=~\"$job\",cl_name=~\"$cl_name\"}[$__interval:])) by (cl_name)", + "expr": "sum(increase(hardware_disk_metrics_read_time_milliseconds{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\",cl_name=~\"$cl_name\"}[$__interval:] offset -$__interval)) by (cl_name)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - reads" + "instant": false, + "interval": "2m", + "legendFormat": "{{cl_name}} - reads", + "refId": "Disk read I/O time" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (increase(hardware_disk_metrics_write_time_milliseconds{job=~\"$job\",cl_name=~\"$cl_name\"}[$__interval:])) by (cl_name)", + "expr": "sum(increase(hardware_disk_metrics_write_time_milliseconds{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\",cl_name=~\"$cl_name\"}[$__interval:] offset -$__interval)) by (cl_name)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - writes" + "instant": false, + "interval": "2m", + "legendFormat": "{{cl_name}} - writes", + "refId": "Disk write I/O time" } ], "title": "Hardware I/O wait time / $__interval", @@ -1009,87 +820,52 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The amount of time spent servicing CPU interrupts.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "ms" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, - "w": 6, - "x": 12, - "y": 22 + "h": 8, + "w": 12, + "x": 0, + "y": 36 }, - "id": 11, + "id": 10, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (increase(hardware_system_cpu_irq_milliseconds{job=~\"$job\",cl_name=~\"$cl_name\"}[$__interval:])) by (cl_name)", + "expr": "sum(increase(hardware_system_cpu_irq_milliseconds{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\",cl_name=~\"$cl_name\"}[$__interval:] offset -$__interval)) by (cl_name)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{cl_name}}" + "instant": false, + "interval": "2m", + "legendFormat": "{{cl_name}}", + "refId": "CPU interrupt service time" } ], "title": "Hardware CPU interrupt service time / $__interval", @@ -1097,95 +873,62 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The amount of RAM and virtual memory being used.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "mbytes" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, - "w": 6, - "x": 18, - "y": 22 + "h": 8, + "w": 12, + "x": 12, + "y": 36 }, - "id": 12, + "id": 11, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (mongodb_mem_resident{job=~\"$job\",cl_name=~\"$cl_name\"}) by (cl_name)", + "expr": "sum(mongodb_mem_resident{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}) by (cl_name)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - RAM" + "instant": false, + "legendFormat": "{{cl_name}} - RAM", + "refId": "Memory resident (RAM)" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (mongodb_mem_virtual{job=~\"$job\",cl_name=~\"$cl_name\"}) by (cl_name)", + "expr": "sum(mongodb_mem_virtual{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}) by (cl_name)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - virtual" + "instant": false, + "legendFormat": "{{cl_name}} - virtual", + "refId": "Memory virtual" } ], "title": "Memory used", @@ -1193,88 +936,53 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The percentage of hardware space used.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, - "mappings": [ ], "max": 1, "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, "unit": "percentunit" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, - "w": 6, + "h": 8, + "w": 24, "x": 0, - "y": 28 + "y": 44 }, - "id": 13, + "id": 12, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "(sum (hardware_disk_metrics_disk_space_used_bytes{job=~\"$job\",cl_name=~\"$cl_name\"}) by(cl_name)) / (clamp_min(sum (hardware_disk_metrics_disk_space_free_bytes{job=~\"$job\",cl_name=~\"$cl_name\"}) by(cl_name) + sum (hardware_disk_metrics_disk_space_used_bytes{job=~\"$job\",cl_name=~\"$cl_name\"}) by(cl_name),0.1))", + "expr": "(sum without (disk_name) (hardware_disk_metrics_disk_space_used_bytes{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"})) / clamp_min((sum without (disk_name) (hardware_disk_metrics_disk_space_used_bytes{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"})) + (sum without (disk_name) (hardware_disk_metrics_disk_space_free_bytes{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"})), 1)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cl_name}}" + "instant": false, + "legendFormat": "{{cl_name}}", + "refId": "Disk space utilization" } ], "title": "Disk space usage", @@ -1282,86 +990,51 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The number of distinct requests that the server has received.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "reqps" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, - "w": 6, - "x": 6, - "y": 28 + "h": 8, + "w": 12, + "x": 0, + "y": 52 }, - "id": 14, + "id": 13, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (rate(mongodb_network_numRequests{job=~\"$job\",cl_name=~\"$cl_name\"}[$__rate_interval])) by (cl_name)", + "expr": "sum(rate(mongodb_network_numRequests{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])) by (cl_name)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cl_name}}" + "instant": false, + "legendFormat": "{{cl_name}}", + "refId": "Network requests" } ], "title": "Network requests", @@ -1369,192 +1042,128 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The number of bytes sent and received over network connections.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, "unit": "Bps" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, - "w": 6, + "h": 8, + "w": 12, "x": 12, - "y": 28 + "y": 52 }, - "id": 15, + "id": 14, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (rate(mongodb_network_bytesIn{job=~\"$job\",cl_name=~\"$cl_name\"}[$__rate_interval])) by (cl_name)", + "expr": "sum(rate(mongodb_network_bytesIn{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])) by (cl_name)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - received" + "instant": false, + "legendFormat": "{{cl_name}} - received", + "refId": "Network bytes received" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (rate(mongodb_network_bytesOut{job=~\"$job\",cl_name=~\"$cl_name\"}[$__rate_interval])) by (cl_name)", + "expr": "sum(rate(mongodb_network_bytesOut{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])) by (cl_name)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - sent" + "instant": false, + "legendFormat": "{{cl_name}} - sent", + "refId": "Network bytes sent" } ], "title": "Network throughput", - "transformations": [ ], "type": "timeseries" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The rate of DNS and SSL operations that took longer than 1 second.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "reqps" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, - "w": 6, - "x": 18, - "y": 28 + "h": 8, + "w": 24, + "x": 0, + "y": 60 }, - "id": 16, + "id": 15, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (rate(mongodb_network_numSlowDNSOperations{job=~\"$job\",cl_name=~\"$cl_name\"}[$__rate_interval])) by (cl_name)", + "expr": "sum(rate(mongodb_network_numSlowDNSOperations{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])) by (cl_name)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - DNS" + "instant": false, + "legendFormat": "{{cl_name}} - DNS", + "refId": "Slow DNS operations" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (rate(mongodb_network_numSlowSSLOperations{job=~\"$job\",cl_name=~\"$cl_name\"}[$__rate_interval])) by (cl_name)", + "expr": "rate(mongodb_network_numSlowSSLOperations{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - SSL" + "instant": false, + "legendFormat": "{{cl_name}} - SSL", + "refId": "Slow SSL operations" } ], "title": "Slow requests", @@ -1562,112 +1171,64 @@ }, { "collapsed": false, - "datasource": { - "uid": "${prometheus_datasource}" - }, "gridPos": { "h": 1, - "w": 24, + "w": 0, "x": 0, - "y": 34 + "y": 68 }, - "id": 17, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "" - } - ], + "id": 16, + "panels": [ ], "title": "Operations", "type": "row" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The rate of incoming connections to the cluster created.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "conns/s" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, - "w": 12, + "h": 8, + "w": 24, "x": 0, - "y": 35 + "y": 69 }, - "id": 18, + "id": 17, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (rate(mongodb_connections_totalCreated{job=~\"$job\",cl_name=~\"$cl_name\"}[$__rate_interval])) by (cl_name)", + "expr": "sum(rate(mongodb_connections_totalCreated{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}[$__rate_interval])) by (cl_name)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cl_name}}" + "instant": false, + "legendFormat": "{{cl_name}}", + "refId": "Connections created" } ], "title": "Connections", @@ -1675,95 +1236,65 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The number of read and write operations.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, "unit": "ops" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 12, - "w": 6, - "x": 12, - "y": 35 + "h": 8, + "w": 12, + "x": 0, + "y": 77 }, - "id": 19, + "id": 18, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (rate(mongodb_opLatencies_reads_ops{job=~\"$job\",cl_name=~\"$cl_name\"}[$__rate_interval])) by (cl_name)", + "expr": "sum(rate(mongodb_opLatencies_reads_ops{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])) by (cl_name)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - reads" + "instant": false, + "legendFormat": "{{cl_name}} - reads", + "refId": "Read operation count" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (rate(mongodb_opLatencies_writes_ops{job=~\"$job\",cl_name=~\"$cl_name\"}[$__rate_interval])) by (cl_name)", + "expr": "sum(rate(mongodb_opLatencies_writes_ops{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])) by (cl_name)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - writes" + "instant": false, + "legendFormat": "{{cl_name}} - writes", + "refId": "Write operation count" } ], "title": "Read/Write operations", @@ -1771,96 +1302,84 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The number of insert, query, update, and delete operations.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - } - }, - "mappings": [ ], - "unit": "none" - }, - "overrides": [ ] - }, "gridPos": { - "h": 12, - "w": 6, - "x": 18, - "y": 35 + "h": 8, + "w": 12, + "x": 12, + "y": 77 }, - "id": 20, + "id": 19, "options": { - "displayLabels": [ ], "legend": { "displayMode": "table", "placement": "bottom", - "showLegend": true, "values": [ "value" ] }, - "pieType": "pie", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false + ] }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (increase(mongodb_opcounters_insert{job=~\"$job\",cl_name=~\"$cl_name\"}[$__interval:])) by (cl_name)", + "expr": "sum(increase(mongodb_opcounters_insert{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)) by (cl_name)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - insert" + "instant": false, + "interval": "2m", + "legendFormat": "{{cl_name}} - insert", + "refId": "Insert operations" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (increase(mongodb_opcounters_query{job=~\"$job\",cl_name=~\"$cl_name\"}[$__interval:])) by (cl_name)", + "expr": "sum(increase(mongodb_opcounters_query{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)) by (cl_name)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - query" + "instant": false, + "interval": "2m", + "legendFormat": "{{cl_name}} - query", + "refId": "Query operations" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (increase(mongodb_opcounters_update{job=~\"$job\",cl_name=~\"$cl_name\"}[$__interval:])) by (cl_name)", + "expr": "sum(increase(mongodb_opcounters_update{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)) by (cl_name)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - update" + "instant": false, + "interval": "2m", + "legendFormat": "{{cl_name}} - update", + "refId": "Update operations" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (increase(mongodb_opcounters_delete{job=~\"$job\",cl_name=~\"$cl_name\"}[$__interval:])) by (cl_name)", + "expr": "sum(increase(mongodb_opcounters_delete{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)) by (cl_name)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - delete" + "instant": false, + "interval": "2m", + "legendFormat": "{{cl_name}} - delete", + "refId": "Delete operations" } ], "title": "Operations", @@ -1868,97 +1387,67 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The latency for read and write operations.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, "unit": "µs" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, - "w": 12, + "h": 8, + "w": 24, "x": 0, - "y": 41 + "y": 85 }, - "id": 21, + "id": 20, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (increase(mongodb_opLatencies_reads_latency{job=~\"$job\",cl_name=~\"$cl_name\"}[$__interval:])) by (cl_name)", + "expr": "sum(increase(mongodb_opLatencies_reads_latency{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)) by (cl_name)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - reads" + "instant": false, + "interval": "2m", + "legendFormat": "{{cl_name}} - reads", + "refId": "Read operation latency" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (increase(mongodb_opLatencies_writes_latency{job=~\"$job\",cl_name=~\"$cl_name\"}[$__interval:])) by (cl_name)", + "expr": "increase(mongodb_opLatencies_writes_latency{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - writes" + "instant": false, + "interval": "2m", + "legendFormat": "{{cl_name}} - writes", + "refId": "Write operation latency" } ], "title": "Read/Write latency / $__interval", @@ -1966,121 +1455,77 @@ }, { "collapsed": false, - "datasource": { - "uid": "${prometheus_datasource}" - }, "gridPos": { "h": 1, - "w": 24, + "w": 0, "x": 0, - "y": 47 + "y": 93 }, - "id": 22, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "" - } - ], + "id": 21, + "panels": [ ], "title": "Locks", "type": "row" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The number of reads and writes queued because of a lock.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "none" - }, - "overrides": [ ] + } + } }, "gridPos": { - "h": 6, + "h": 8, "w": 12, "x": 0, - "y": 48 + "y": 94 }, - "id": 23, + "id": 22, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (mongodb_globalLock_currentQueue_readers{job=~\"$job\",cl_name=~\"$cl_name\"}) by (cl_name)", + "expr": "sum(mongodb_globalLock_currentQueue_readers{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}) by (cl_name)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - reads" + "instant": false, + "legendFormat": "{{cl_name}} - reads", + "refId": "Global lock queue - readers" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (mongodb_globalLock_currentQueue_writers{job=~\"$job\",cl_name=~\"$cl_name\"}) by (cl_name)", + "expr": "sum(mongodb_globalLock_currentQueue_writers{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}) by (cl_name)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - writes" + "instant": false, + "legendFormat": "{{cl_name}} - writes", + "refId": "Global lock queue - writers" } ], "title": "Current queue", @@ -2088,95 +1533,64 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The number of reads and writes being actively performed by connected clients.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "none" - }, - "overrides": [ ] + } + } }, "gridPos": { - "h": 6, + "h": 8, "w": 12, "x": 12, - "y": 48 + "y": 94 }, - "id": 24, + "id": 23, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (mongodb_globalLock_activeClients_readers{job=~\"$job\",cl_name=~\"$cl_name\"}) by (cl_name)", + "expr": "sum(mongodb_globalLock_activeClients_readers{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}) by (cl_name)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - reads" + "instant": false, + "legendFormat": "{{cl_name}} - reads", + "refId": "Global lock active clients - readers" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (mongodb_globalLock_activeClients_writers{job=~\"$job\",cl_name=~\"$cl_name\"}) by (cl_name)", + "expr": "sum(mongodb_globalLock_activeClients_writers{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}) by (cl_name)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - writes" + "instant": false, + "legendFormat": "{{cl_name}} - writes", + "refId": "Global lock active clients - writers" } ], "title": "Active client operations", @@ -2184,117 +1598,91 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The number of deadlocks for database level locks.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "none" - }, - "overrides": [ ] + } + } }, "gridPos": { - "h": 6, + "h": 8, "w": 12, "x": 0, - "y": 54 + "y": 102 }, - "id": 25, + "id": 24, "options": { "legend": { "calcs": [ ], "displayMode": "list", - "placement": "right", - "showLegend": true + "placement": "right" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (increase(mongodb_locks_Database_deadlockCount_W{job=~\"$job\",cl_name=~\"$cl_name\"}[$__interval:])) by (cl_name)", + "expr": "sum(increase(mongodb_locks_Database_deadlockCount_W{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}[$__interval:] offset -$__interval)) by (cl_name)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - exclusive" + "instant": false, + "interval": "2m", + "legendFormat": "{{cl_name}} - exclusive", + "refId": "Database exclusive lock deadlocks" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (increase(mongodb_locks_Database_deadlockCount_w{job=~\"$job\",cl_name=~\"$cl_name\"}[$__interval:])) by (cl_name)", + "expr": "sum(increase(mongodb_locks_Database_deadlockCount_w{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}[$__interval:] offset -$__interval)) by (cl_name)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - intent exclusive" + "instant": false, + "interval": "2m", + "legendFormat": "{{cl_name}} - intent exclusive", + "refId": "Database intent exclusive lock deadlocks" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (increase(mongodb_locks_Database_deadlockCount_R{job=~\"$job\",cl_name=~\"$cl_name\"}[$__interval:])) by (cl_name)", + "expr": "sum(increase(mongodb_locks_Database_deadlockCount_R{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}[$__interval:] offset -$__interval)) by (cl_name)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - shared" + "instant": false, + "interval": "2m", + "legendFormat": "{{cl_name}} - shared", + "refId": "Database shared lock deadlocks" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (increase(mongodb_locks_Database_deadlockCount_r{job=~\"$job\",cl_name=~\"$cl_name\"}[$__interval:])) by (cl_name)", + "expr": "sum(increase(mongodb_locks_Database_deadlockCount_r{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}[$__interval:] offset -$__interval)) by (cl_name)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - intent shared" + "instant": false, + "interval": "2m", + "legendFormat": "{{cl_name}} - intent shared", + "refId": "Database intent shared lock deadlocks" } ], "title": "Database deadlocks / $__interval", @@ -2302,117 +1690,91 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The number of times lock acquisitions encounter waits for database level locks.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "none" - }, - "overrides": [ ] + } + } }, "gridPos": { - "h": 6, + "h": 8, "w": 12, "x": 12, - "y": 54 + "y": 102 }, - "id": 26, + "id": 25, "options": { "legend": { "calcs": [ ], "displayMode": "list", - "placement": "right", - "showLegend": true + "placement": "right" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (increase(mongodb_locks_Database_acquireWaitCount_W{job=~\"$job\",cl_name=~\"$cl_name\"}[$__interval:])) by (cl_name)", + "expr": "sum(increase(mongodb_locks_Database_acquireWaitCount_W{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}[$__interval:] offset -$__interval)) by (cl_name)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - exclusive" + "instant": false, + "interval": "2m", + "legendFormat": "{{cl_name}} - exclusive", + "refId": "Database exclusive lock wait count" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (increase(mongodb_locks_Database_acquireWaitCount_w{job=~\"$job\",cl_name=~\"$cl_name\"}[$__interval:])) by (cl_name)", + "expr": "sum(increase(mongodb_locks_Database_acquireWaitCount_w{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}[$__interval:] offset -$__interval)) by (cl_name)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - intent exclusive" + "instant": false, + "interval": "2m", + "legendFormat": "{{cl_name}} - intent exclusive", + "refId": "Database intent exclusive lock wait count" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (increase(mongodb_locks_Database_acquireWaitCount_R{job=~\"$job\",cl_name=~\"$cl_name\"}[$__interval:])) by (cl_name)", + "expr": "sum(increase(mongodb_locks_Database_acquireWaitCount_R{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}[$__interval:] offset -$__interval)) by (cl_name)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - shared" + "instant": false, + "interval": "2m", + "legendFormat": "{{cl_name}} - shared", + "refId": "Database shared lock wait count" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (increase(mongodb_locks_Database_acquireWaitCount_r{job=~\"$job\",cl_name=~\"$cl_name\"}[$__interval:])) by (cl_name)", + "expr": "sum(increase(mongodb_locks_Database_acquireWaitCount_r{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}[$__interval:] offset -$__interval)) by (cl_name)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - intent shared" + "instant": false, + "interval": "2m", + "legendFormat": "{{cl_name}} - intent shared", + "refId": "Database intent shared lock wait count" } ], "title": "Database waits acquiring lock / $__interval", @@ -2420,68 +1782,76 @@ } ], "refresh": "1m", - "rows": [ ], - "schemaVersion": 14, - "style": "dark", + "schemaVersion": 39, "tags": [ "mongodb-atlas-mixin" ], "templating": { "list": [ { - "current": { }, - "hide": 0, - "label": "Data Source", - "name": "prometheus_datasource", - "options": [ ], + "label": "Data source", + "name": "datasource", "query": "prometheus", - "refresh": 1, "regex": "", "type": "datasource" }, { - "allValue": null, - "current": { }, + "allValue": ".+", "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "hide": 0, - "includeAll": false, + "includeAll": true, "label": "Job", "multi": true, "name": "job", - "options": [ ], - "query": "label_values(mongodb_network_bytesIn,job)", + "query": "label_values(mongodb_network_bytesIn{job=\"integrations/mongodb-atlas\"}, job)", "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "sort": 1, + "type": "query" }, { "allValue": ".+", - "current": { }, "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "hide": 0, "includeAll": true, "label": "Atlas cluster", "multi": true, "name": "cl_name", - "options": [ ], - "query": "label_values(mongodb_network_bytesIn{job=~\"$job\"},cl_name)", + "query": "label_values(mongodb_network_bytesIn{job=\"integrations/mongodb-atlas\",job=~\"$job\"}, cl_name)", "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "sort": 1, + "type": "query" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "includeAll": true, + "label": "Instance", + "multi": true, + "name": "instance", + "query": "label_values(mongodb_network_bytesIn{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\"}, instance)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "includeAll": true, + "label": "Replica set", + "multi": true, + "name": "rs_nm", + "query": "label_values(mongodb_network_bytesIn, rs_nm)", + "refresh": 2, + "type": "query" } ] }, @@ -2489,33 +1859,7 @@ "from": "now-30m", "to": "now" }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, "timezone": "default", "title": "MongoDB Atlas cluster overview", - "uid": "mongodb-atlas-cluster-overview", - "version": 0 + "uid": "mongodb-atlas-cluster-overview" } \ No newline at end of file diff --git a/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-elections-overview.json b/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-elections-overview.json index 1a76b3003..6430f84ca 100644 --- a/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-elections-overview.json +++ b/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-elections-overview.json @@ -1,125 +1,110 @@ { - "__inputs": [ ], - "__requires": [ ], "annotations": { "list": [ ] }, - "description": "", - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "id": null, + "description": "Overview of MongoDB Atlas election metrics.", "links": [ { - "asDropdown": false, - "icon": "external link", - "includeVars": true, "keepTime": true, - "tags": [ - "mongodb-atlas-mixin" - ], - "targetBlank": false, - "title": "MongoDB Atlas dashboards", - "type": "dashboards", - "url": "" + "title": "MongoDB Atlas cluster overview", + "type": "link", + "url": "/d/mongodb-atlas-cluster-overview" + }, + { + "keepTime": true, + "title": "MongoDB Atlas operations overview", + "type": "link", + "url": "/d/mongodb-atlas-operations-overview" + }, + { + "keepTime": true, + "title": "MongoDB Atlas performance overview", + "type": "link", + "url": "/d/mongodb-atlas-performance-overview" + }, + { + "keepTime": true, + "title": "MongoDB Atlas sharding overview", + "type": "link", + "url": "/d/mongodb-atlas-sharding-overview" } ], "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 0 + }, + "id": 1, + "panels": [ ], + "title": "Elections", + "type": "row" + }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The number of elections called and elections won by the node when the primary stepped down.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 0 + "y": 1 }, "id": 2, "options": { "legend": { + "asTable": true, "calcs": [ ], "displayMode": "list", - "placement": "bottom", - "showLegend": true + "placement": "right" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_electionMetrics_stepUpCmd_called{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "increase(mongodb_electionMetrics_stepUpCmd_called{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - called" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - called", + "refId": "Step-up elections called" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_electionMetrics_stepUpCmd_successful{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "increase(mongodb_electionMetrics_stepUpCmd_successful{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - successful" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - successful", + "refId": "Step-up elections successful" } ], "title": "Step-up elections / $__interval", @@ -127,98 +112,66 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The number of elections called and elections won by the node when it had a higher priority than the primary node.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 0 + "y": 1 }, "id": 3, "options": { "legend": { + "asTable": true, "calcs": [ ], "displayMode": "list", - "placement": "bottom", - "showLegend": true + "placement": "right" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_electionMetrics_priorityTakeover_called{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "increase(mongodb_electionMetrics_priorityTakeover_called{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - called" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - called", + "refId": "Priority takeover elections called" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_electionMetrics_priorityTakeover_successful{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "increase(mongodb_electionMetrics_priorityTakeover_successful{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - successful" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - successful", + "refId": "Priority takeover elections successful" } ], "title": "Priority elections / $__interval", @@ -226,98 +179,66 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The number of elections called and elections won by the node when it was more current than the primary node.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 8 + "y": 9 }, "id": 4, "options": { "legend": { + "asTable": true, "calcs": [ ], "displayMode": "list", - "placement": "bottom", - "showLegend": true + "placement": "right" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_electionMetrics_catchUpTakeover_called{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "increase(mongodb_electionMetrics_catchUpTakeover_called{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - called" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - called", + "refId": "Catch-up takeover elections called" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_electionMetrics_catchUpTakeover_successful{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "increase(mongodb_electionMetrics_catchUpTakeover_successful{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - successful" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - successful", + "refId": "Catch-up takeover elections successful" } ], "title": "Takeover elections / $__interval", @@ -325,212 +246,132 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The number of elections called and elections won by the node when the time it took to reach the primary node exceeded the election timeout limit.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 8 + "y": 9 }, "id": 5, "options": { "legend": { + "asTable": true, "calcs": [ ], "displayMode": "list", - "placement": "bottom", - "showLegend": true + "placement": "right" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_electionMetrics_electionTimeout_called{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "increase(mongodb_electionMetrics_electionTimeout_called{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - called" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - called", + "refId": "Election timeout elections called" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_electionMetrics_electionTimeout_successful{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "increase(mongodb_electionMetrics_electionTimeout_successful{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - successful" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - successful", + "refId": "Election timeout elections successful" } ], "title": "Timeout elections / $__interval", "type": "timeseries" }, { - "datasource": { - "uid": "${prometheus_datasource}" - }, + "collapsed": false, "gridPos": { "h": 1, - "w": 24, + "w": 0, "x": 0, - "y": 16 + "y": 17 }, "id": 6, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "" - } - ], + "panels": [ ], "title": "Catch-ups", "type": "row" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The number of times the node had to catch up to the highest known oplog entry.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 17 + "y": 18 }, "id": 7, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_electionMetrics_numCatchUps{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "increase(mongodb_electionMetrics_numCatchUps{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}}", + "refId": "Number of catch-ups" } ], "title": "Catch-ups / $__interval", @@ -538,88 +379,52 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The number of times the node skipped the catch up process when it was the newly elected primary.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 17 + "y": 18 }, "id": 8, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_electionMetrics_numCatchUpsSkipped{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "increase(mongodb_electionMetrics_numCatchUpsSkipped{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}}", + "refId": "Number of catch-ups skipped" } ], "title": "Catch-ups skipped / $__interval", @@ -627,87 +432,52 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The number of times the node succeeded in catching up when it was the newly elected primary.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 25 + "y": 26 }, "id": 9, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_electionMetrics_numCatchUpsSucceeded{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "increase(mongodb_electionMetrics_numCatchUpsSucceeded{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}}", + "refId": "Number of catch-ups succeeded" } ], "title": "Catch-ups succeeded / $__interval", @@ -715,87 +485,52 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The number of times the node failed in catching up when it was the newly elected primary.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 25 + "y": 26 }, "id": 10, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_electionMetrics_numCatchUpsFailedWithError{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "increase(mongodb_electionMetrics_numCatchUpsFailedWithError{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}}", + "refId": "Number of catch-ups failed with error" } ], "title": "Catch-ups failed / $__interval", @@ -803,87 +538,52 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The number of times the node timed out during the catch-up process when it was the newly elected primary.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 33 + "y": 34 }, "id": 11, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_electionMetrics_numCatchUpsTimedOut{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "increase(mongodb_electionMetrics_numCatchUpsTimedOut{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}}", + "refId": "Number of catch-up timeouts" } ], "title": "Catch-up timeouts / $__interval", @@ -891,86 +591,51 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The average number of operations done during the catch-up process when this node is the newly elected primary.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 33 + "y": 34 }, "id": 12, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "mongodb_electionMetrics_averageCatchUpOps{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}", + "expr": "mongodb_electionMetrics_averageCatchUpOps{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "legendFormat": "{{instance}}", + "refId": "Average catch-up operations" } ], "title": "Average catch-up operations", @@ -978,112 +643,76 @@ } ], "refresh": "1m", - "rows": [ ], - "schemaVersion": 14, - "style": "dark", + "schemaVersion": 39, "tags": [ "mongodb-atlas-mixin" ], "templating": { "list": [ { - "current": { }, - "hide": 0, - "label": "Data Source", - "name": "prometheus_datasource", - "options": [ ], + "label": "Data source", + "name": "datasource", "query": "prometheus", - "refresh": 1, "regex": "", "type": "datasource" }, { - "allValue": null, - "current": { }, + "allValue": ".+", "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "hide": 0, - "includeAll": false, + "includeAll": true, "label": "Job", "multi": true, "name": "job", - "options": [ ], - "query": "label_values(mongodb_network_bytesIn,job)", + "query": "label_values(mongodb_network_bytesIn{job=\"integrations/mongodb-atlas\"}, job)", "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "sort": 1, + "type": "query" }, { "allValue": ".+", - "current": { }, "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "hide": 0, "includeAll": true, "label": "Atlas cluster", "multi": true, "name": "cl_name", - "options": [ ], - "query": "label_values(mongodb_network_bytesIn{job=~\"$job\"},cl_name)", + "query": "label_values(mongodb_network_bytesIn{job=\"integrations/mongodb-atlas\",job=~\"$job\"}, cl_name)", "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "sort": 1, + "type": "query" }, { "allValue": ".+", - "current": { }, "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "hide": 0, "includeAll": true, - "label": "Replica set", + "label": "Instance", "multi": true, - "name": "rs", - "options": [ ], - "query": "label_values(mongodb_network_bytesIn{cl_name=~\"$cl_name\"},rs_nm)", + "name": "instance", + "query": "label_values(mongodb_network_bytesIn{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\"}, instance)", "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "sort": 1, + "type": "query" }, { - "allValue": ".+", - "current": { }, "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "hide": 0, "includeAll": true, - "label": "Node", + "label": "Replica set", "multi": true, - "name": "instance", - "options": [ ], - "query": "label_values(mongodb_network_bytesIn{rs_nm=~\"$rs\"},instance)", + "name": "rs_nm", + "query": "label_values(mongodb_network_bytesIn, rs_nm)", "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "type": "query" } ] }, @@ -1091,33 +720,7 @@ "from": "now-30m", "to": "now" }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, "timezone": "default", - "title": "MongoDB Atlas election overview", - "uid": "mongodb-atlas-elections-overview", - "version": 0 + "title": "MongoDB Atlas elections overview", + "uid": "mongodb-atlas-elections-overview" } \ No newline at end of file diff --git a/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-operations-overview.json b/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-operations-overview.json index b1c6bdb91..88a801f31 100644 --- a/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-operations-overview.json +++ b/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-operations-overview.json @@ -1,290 +1,208 @@ { - "__inputs": [ ], - "__requires": [ ], "annotations": { "list": [ ] }, - "description": "", - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "id": null, + "description": "Overview of MongoDB Atlas operation metrics.", "links": [ { - "asDropdown": false, - "icon": "external link", - "includeVars": true, "keepTime": true, - "tags": [ - "mongodb-atlas-mixin" - ], - "targetBlank": false, - "title": "MongoDB Atlas dashboards", - "type": "dashboards", - "url": "" + "title": "MongoDB Atlas cluster overview", + "type": "link", + "url": "/d/mongodb-atlas-cluster-overview" + }, + { + "keepTime": true, + "title": "MongoDB Atlas elections overview", + "type": "link", + "url": "/d/mongodb-atlas-elections-overview" + }, + { + "keepTime": true, + "title": "MongoDB Atlas performance overview", + "type": "link", + "url": "/d/mongodb-atlas-performance-overview" + }, + { + "keepTime": true, + "title": "MongoDB Atlas sharding overview", + "type": "link", + "url": "/d/mongodb-atlas-sharding-overview" } ], "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 0 + }, + "id": 1, + "panels": [ ], + "title": "Operation counters - instance", + "type": "row" + }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "description": "The rate of query operations the node has received.", + "description": "The rate of insert operations the node has received.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, "unit": "ops" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 0 + "y": 1 }, "id": 2, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "rate(mongodb_opcounters_query{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "rate(mongodb_opcounters_insert{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "legendFormat": "{{instance}}", + "refId": "Insert operations by instance" } ], - "title": "Query operations", + "title": "Insert operations", "type": "timeseries" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "description": "The rate of insert operations the node has received.", + "description": "The rate of query operations the node has received.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, "unit": "ops" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 0 + "y": 1 }, "id": 3, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "rate(mongodb_opcounters_insert{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "rate(mongodb_opcounters_query{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "legendFormat": "{{instance}}", + "refId": "Query operations by instance" } ], - "title": "Insert operations", + "title": "Query operations", "type": "timeseries" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The rate of update operations this node has received.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, "unit": "ops" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 8 + "y": 9 }, "id": 4, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "rate(mongodb_opcounters_update{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "rate(mongodb_opcounters_update{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "legendFormat": "{{instance}}", + "refId": "Update operations" } ], "title": "Update operations", @@ -292,175 +210,119 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The rate of delete operations this node has received.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, "unit": "ops" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 8 + "y": 9 }, "id": 5, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "rate(mongodb_opcounters_delete{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "rate(mongodb_opcounters_delete{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "legendFormat": "{{instance}}", + "refId": "Delete operations by instance" } ], "title": "Delete operations", "type": "timeseries" }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 17 + }, + "id": 6, + "panels": [ ], + "title": "Connections", + "type": "row" + }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The number of incoming connections from clients to the node.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 16 + "y": 18 }, - "id": 6, + "id": 7, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "mongodb_connections_current{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}", + "expr": "mongodb_connections_current{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "legendFormat": "{{instance}}", + "refId": "Current connections" } ], "title": "Current connections", @@ -468,183 +330,130 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The number of connections that currently have operations in progress.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 16 + "y": 18 }, - "id": 7, + "id": 8, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "mongodb_connections_active{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}", + "expr": "mongodb_connections_active{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "legendFormat": "{{instance}}", + "refId": "Active connections" } ], "title": "Active connections", "type": "timeseries" }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 26 + }, + "id": 9, + "panels": [ ], + "title": "Read/Write operations", + "type": "row" + }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The rate of read and write operations performed by the node.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, "unit": "ops" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 24 + "y": 27 }, - "id": 8, + "id": 10, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "rate(mongodb_opLatencies_reads_ops{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "rate(mongodb_opLatencies_reads_ops{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - reads" + "instant": false, + "legendFormat": "{{instance}} - reads", + "refId": "Read operations by instance" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "rate(mongodb_opLatencies_writes_ops{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "rate(mongodb_opLatencies_writes_ops{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - writes" + "instant": false, + "legendFormat": "{{instance}} - writes", + "refId": "Write operations by instance" } ], "title": "Read and write operations", @@ -652,240 +461,172 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The latency time for read and write operations performed by this node.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, "unit": "µs" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 24 + "y": 27 }, - "id": 9, + "id": 11, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_opLatencies_reads_latency{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "increase(mongodb_opLatencies_reads_latency{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - reads" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - reads", + "refId": "Read latency by instance" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_opLatencies_writes_latency{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "increase(mongodb_opLatencies_writes_latency{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - writes" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - writes", + "refId": "Write latency by instance" } ], "title": "Read and write latency / $__interval", "type": "timeseries" }, { - "datasource": { - "uid": "${prometheus_datasource}" - }, + "collapsed": false, "gridPos": { "h": 1, - "w": 24, + "w": 0, "x": 0, - "y": 32 + "y": 35 }, - "id": 10, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "" - } - ], + "id": 12, + "panels": [ ], "title": "Locks", "type": "row" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The number of deadlocks that have occurred for the database lock.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 8, "x": 0, - "y": 33 + "y": 36 }, - "id": 11, + "id": 13, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_locks_Database_deadlockCount_W{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "increase(mongodb_locks_Database_deadlockCount_W{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - exclusive" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - exclusive", + "refId": "Database exclusive lock deadlocks by instance" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_locks_Database_deadlockCount_w{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "increase(mongodb_locks_Database_deadlockCount_w{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - intent exclusive" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - intent exclusive", + "refId": "Database intent exclusive lock deadlocks by instance" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_locks_Database_deadlockCount_R{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "increase(mongodb_locks_Database_deadlockCount_R{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - shared" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - shared", + "refId": "Database shared lock deadlocks by instance" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_locks_Database_deadlockCount_r{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "increase(mongodb_locks_Database_deadlockCount_r{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - intent shared" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - intent shared", + "refId": "Database intent shared lock deadlocks by instance" } ], "title": "Database deadlocks / $__interval", @@ -893,117 +634,91 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The number of database lock acquisitions that had to wait.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 8, "x": 8, - "y": 33 + "y": 36 }, - "id": 12, + "id": 14, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_locks_Database_acquireWaitCount_W{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "increase(mongodb_locks_Database_acquireWaitCount_W{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - exclusive" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - exclusive", + "refId": "Database exclusive lock wait count by instance" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_locks_Database_acquireWaitCount_w{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "increase(mongodb_locks_Database_acquireWaitCount_w{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - intent exclusive" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - intent exclusive", + "refId": "Database intent exclusive lock wait count by instance" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_locks_Database_acquireWaitCount_R{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "increase(mongodb_locks_Database_acquireWaitCount_R{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - shared" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - shared", + "refId": "Database shared lock wait count by instance" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_locks_Database_acquireWaitCount_r{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "increase(mongodb_locks_Database_acquireWaitCount_r{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - intent shared" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - intent shared", + "refId": "Database intent shared lock wait count by instance" } ], "title": "Database wait count / $__interval", @@ -1011,117 +726,91 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The time spent waiting for the database lock acquisition.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, "unit": "µs" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 8, "x": 16, - "y": 33 + "y": 36 }, - "id": 13, + "id": 15, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_locks_Database_timeAcquiringMicros_W{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "increase(mongodb_locks_Database_timeAcquiringMicros_W{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - exclusive" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - exclusive", + "refId": "Database exclusive lock acquisition time by instance" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_locks_Database_timeAcquiringMicros_w{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "increase(mongodb_locks_Database_timeAcquiringMicros_w{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - intent exclusive" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - intent exclusive", + "refId": "Database intent exclusive lock acquisition time by instance" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_locks_Database_timeAcquiringMicros_R{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "increase(mongodb_locks_Database_timeAcquiringMicros_R{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - shared" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - shared", + "refId": "Database shared lock acquisition time by instance" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_locks_Database_timeAcquiringMicros_r{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "increase(mongodb_locks_Database_timeAcquiringMicros_r{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - intent shared" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - intent shared", + "refId": "Database intent shared lock acquisition time by instance" } ], "title": "Database wait time / $__interval", @@ -1129,117 +818,91 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The number of deadlocks that have occurred for the collection lock.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 8, "x": 0, - "y": 41 + "y": 44 }, - "id": 14, + "id": 16, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_locks_Collection_deadlockCount_W{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "increase(mongodb_locks_Collection_deadlockCount_W{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - exclusive" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - exclusive", + "refId": "Collection exclusive lock deadlocks by instance" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_locks_Collection_deadlockCount_w{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "increase(mongodb_locks_Collection_deadlockCount_w{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - intent exclusive" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - intent exclusive", + "refId": "Collection intent exclusive lock deadlocks by instance" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_locks_Collection_deadlockCount_R{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "increase(mongodb_locks_Collection_deadlockCount_R{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - shared" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - shared", + "refId": "Collection shared lock deadlocks by instance" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_locks_Collection_deadlockCount_r{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "increase(mongodb_locks_Collection_deadlockCount_r{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - intent shared" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - intent shared", + "refId": "Collection intent shared lock deadlocks by instance" } ], "title": "Collection deadlocks / $__interval", @@ -1247,117 +910,91 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The number of collection lock acquisitions that had to wait.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 8, "x": 8, - "y": 41 + "y": 44 }, - "id": 15, + "id": 17, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_locks_Collection_acquireWaitCount_W{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "increase(mongodb_locks_Collection_acquireWaitCount_W{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - exclusive" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - exclusive", + "refId": "Collection exclusive lock wait count by instance" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_locks_Collection_acquireWaitCount_w{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "increase(mongodb_locks_Collection_acquireWaitCount_w{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - intent exclusive" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - intent exclusive", + "refId": "Collection intent exclusive lock wait count by instance" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_locks_Collection_acquireWaitCount_R{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "increase(mongodb_locks_Collection_acquireWaitCount_R{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - shared" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - shared", + "refId": "Collection shared lock wait count by instance" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_locks_Collection_acquireWaitCount_r{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "increase(mongodb_locks_Collection_acquireWaitCount_r{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - intent shared" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - intent shared", + "refId": "Collection intent shared lock wait count by instance" } ], "title": "Collection wait count / $__interval", @@ -1365,117 +1002,91 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The time spent waiting for the collection lock acquisition.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, "unit": "µs" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 8, "x": 16, - "y": 41 + "y": 44 }, - "id": 16, + "id": 18, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_locks_Collection_timeAcquiringMicros_W{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "increase(mongodb_locks_Collection_timeAcquiringMicros_W{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - exclusive" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - exclusive", + "refId": "Collection exclusive lock acquisition time by instance" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_locks_Collection_timeAcquiringMicros_w{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "increase(mongodb_locks_Collection_timeAcquiringMicros_w{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - intent exclusive" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - intent exclusive", + "refId": "Collection intent exclusive lock acquisition time by instance" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_locks_Collection_timeAcquiringMicros_R{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "increase(mongodb_locks_Collection_timeAcquiringMicros_R{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - shared" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - shared", + "refId": "Collection shared lock acquisition time by instance" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_locks_Collection_timeAcquiringMicros_r{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "increase(mongodb_locks_Collection_timeAcquiringMicros_r{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - intent shared" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - intent shared", + "refId": "Collection intent shared lock acquisition time by instance" } ], "title": "Collection wait time / $__interval", @@ -1483,112 +1094,76 @@ } ], "refresh": "1m", - "rows": [ ], - "schemaVersion": 14, - "style": "dark", + "schemaVersion": 39, "tags": [ "mongodb-atlas-mixin" ], "templating": { "list": [ { - "current": { }, - "hide": 0, - "label": "Data Source", - "name": "prometheus_datasource", - "options": [ ], + "label": "Data source", + "name": "datasource", "query": "prometheus", - "refresh": 1, "regex": "", "type": "datasource" }, { - "allValue": null, - "current": { }, + "allValue": ".+", "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "hide": 0, - "includeAll": false, + "includeAll": true, "label": "Job", "multi": true, "name": "job", - "options": [ ], - "query": "label_values(mongodb_network_bytesIn,job)", + "query": "label_values(mongodb_network_bytesIn{job=\"integrations/mongodb-atlas\"}, job)", "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "sort": 1, + "type": "query" }, { "allValue": ".+", - "current": { }, "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "hide": 0, "includeAll": true, "label": "Atlas cluster", "multi": true, "name": "cl_name", - "options": [ ], - "query": "label_values(mongodb_network_bytesIn{job=~\"$job\"},cl_name)", + "query": "label_values(mongodb_network_bytesIn{job=\"integrations/mongodb-atlas\",job=~\"$job\"}, cl_name)", "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "sort": 1, + "type": "query" }, { "allValue": ".+", - "current": { }, "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "hide": 0, "includeAll": true, - "label": "Replica set", + "label": "Instance", "multi": true, - "name": "rs", - "options": [ ], - "query": "label_values(mongodb_network_bytesIn{cl_name=~\"$cl_name\"},rs_nm)", + "name": "instance", + "query": "label_values(mongodb_network_bytesIn{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\"}, instance)", "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "sort": 1, + "type": "query" }, { - "allValue": ".+", - "current": { }, "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "hide": 0, "includeAll": true, - "label": "Node", + "label": "Replica set", "multi": true, - "name": "instance", - "options": [ ], - "query": "label_values(mongodb_network_bytesIn{rs_nm=~\"$rs\"},instance)", + "name": "rs_nm", + "query": "label_values(mongodb_network_bytesIn, rs_nm)", "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "type": "query" } ] }, @@ -1596,33 +1171,7 @@ "from": "now-30m", "to": "now" }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, "timezone": "default", "title": "MongoDB Atlas operations overview", - "uid": "mongodb-atlas-operations-overview", - "version": 0 + "uid": "mongodb-atlas-operations-overview" } \ No newline at end of file diff --git a/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-performance-overview.json b/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-performance-overview.json index f722decee..da714c395 100644 --- a/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-performance-overview.json +++ b/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-performance-overview.json @@ -1,123 +1,106 @@ { - "__inputs": [ ], - "__requires": [ ], "annotations": { "list": [ ] }, - "description": "", - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "id": null, + "description": "Overview of MongoDB Atlas performance metrics.", "links": [ { - "asDropdown": false, - "icon": "external link", - "includeVars": true, "keepTime": true, - "tags": [ - "mongodb-atlas-mixin" - ], - "targetBlank": false, - "title": "MongoDB Atlas dashboards", - "type": "dashboards", - "url": "" + "title": "MongoDB Atlas cluster overview", + "type": "link", + "url": "/d/mongodb-atlas-cluster-overview" + }, + { + "keepTime": true, + "title": "MongoDB Atlas elections overview", + "type": "link", + "url": "/d/mongodb-atlas-elections-overview" + }, + { + "keepTime": true, + "title": "MongoDB Atlas operations overview", + "type": "link", + "url": "/d/mongodb-atlas-operations-overview" + }, + { + "keepTime": true, + "title": "MongoDB Atlas sharding overview", + "type": "link", + "url": "/d/mongodb-atlas-sharding-overview" } ], "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 0 + }, + "id": 1, + "panels": [ ], + "title": "Memory and hardware", + "type": "row" + }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The amount of RAM and virtual memory being used by the database process.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "mbytes" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 0 + "y": 1 }, "id": 2, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "mongodb_mem_resident{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}", + "expr": "mongodb_mem_resident{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - RAM" + "instant": false, + "legendFormat": "{{instance}} - RAM", + "refId": "Memory resident by instance" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "mongodb_mem_virtual{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}", + "expr": "mongodb_mem_virtual{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - virtual" + "instant": false, + "legendFormat": "{{instance}} - virtual", + "refId": "Memory virtual by instance" } ], "title": "Memory", @@ -125,185 +108,128 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The amount of time spent servicing CPU interrupts.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "ms" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 0 + "y": 1 }, "id": 3, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(hardware_system_cpu_irq_milliseconds{job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}[$__interval:])", + "expr": "increase(hardware_system_cpu_irq_milliseconds{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}[$__interval:] offset -$__interval)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}}", + "refId": "CPU interrupt service time by instance" } ], "title": "Hardware CPU interrupt service time / $__interval", "type": "timeseries" }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 9 + }, + "id": 4, + "panels": [ ], + "title": "Disk", + "type": "row" + }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The amount of free and used disk space on this node's hardware.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "decbytes" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 8 + "y": 10 }, - "id": 4, + "id": 5, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "hardware_disk_metrics_disk_space_free_bytes{job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}", + "expr": "hardware_disk_metrics_disk_space_free_bytes{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - free" + "instant": false, + "legendFormat": "{{instance}} - free", + "refId": "Disk space free" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "hardware_disk_metrics_disk_space_used_bytes{job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}", + "expr": "hardware_disk_metrics_disk_space_used_bytes{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - used" + "instant": false, + "legendFormat": "{{instance}} - used", + "refId": "Disk space used" } ], "title": "Disk space", @@ -311,177 +237,118 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The disk space utilization for this node's hardware.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, - "mappings": [ ], "max": 1, "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, "unit": "percentunit" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 8 + "y": 10 }, - "id": 5, + "id": 6, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "(hardware_disk_metrics_disk_space_used_bytes{job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}) / clamp_min((hardware_disk_metrics_disk_space_free_bytes{job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}) + (hardware_disk_metrics_disk_space_used_bytes{job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}), 1)", + "expr": "(hardware_disk_metrics_disk_space_used_bytes{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}) / clamp_min((hardware_disk_metrics_disk_space_free_bytes{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}) + (hardware_disk_metrics_disk_space_used_bytes{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}), 1)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "legendFormat": "{{instance}}", + "refId": "Disk space utilization by instance" } ], "title": "Disk space utilization", "type": "timeseries" }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 18 + }, + "id": 7, + "panels": [ ], + "title": "Network", + "type": "row" + }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The rate of distinct requests the node has received.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "reqps" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 16 + "y": 19 }, - "id": 6, + "id": 8, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "rate(mongodb_network_numRequests{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "rate(mongodb_network_numRequests{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "legendFormat": "{{instance}}", + "refId": "Network requests by instance" } ], "title": "Network requests", @@ -489,96 +356,62 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The rate of slow DNS and SSL operations received by this node.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "unit": "reqps" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 16 + "y": 19 }, - "id": 7, + "id": 9, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "rate(mongodb_network_numSlowDNSOperations{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "rate(mongodb_network_numSlowDNSOperations{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - DNS" + "instant": false, + "legendFormat": "{{instance}} - DNS", + "refId": "Slow DNS operations by instance" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "rate(mongodb_network_numSlowSSLOperations{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "rate(mongodb_network_numSlowSSLOperations{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - SSL" + "instant": false, + "legendFormat": "{{instance}} - SSL", + "refId": "Slow SSL operations by instance" } ], "title": "Slow network requests", @@ -586,191 +419,144 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The rate of bytes sent and received by the node over a network connection.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, "unit": "Bps" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 24, "x": 0, - "y": 24 + "y": 27 }, - "id": 8, + "id": 10, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "rate(mongodb_network_bytesIn{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "rate(mongodb_network_bytesIn{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - received" + "instant": false, + "legendFormat": "{{instance}} - received", + "refId": "Network bytes received by instance" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "rate(mongodb_network_bytesOut{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "rate(mongodb_network_bytesOut{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - sent" + "instant": false, + "legendFormat": "{{instance}} - sent", + "refId": "Network bytes sent by instance" } ], "title": "Network throughput", "type": "timeseries" }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 35 + }, + "id": 11, + "panels": [ ], + "title": "Hardware I/O", + "type": "row" + }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The rate of read and write I/O's processed by this node.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, "unit": "iops" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 24, "x": 0, - "y": 32 + "y": 36 }, - "id": 9, + "id": 12, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "expr": "rate(hardware_disk_metrics_read_count{job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - reads" + "instant": false, + "legendFormat": "{{instance}} - reads", + "refId": "Disk read operations by instance" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "rate(hardware_disk_metrics_write_count{job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "hardware_disk_metrics_write_count{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - writes" + "instant": false, + "legendFormat": "{{instance}} - writes", + "refId": "Disk write operations by instance" } ], "title": "Hardware I/O", @@ -778,97 +564,67 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The amount of time the node has spent waiting for read and write I/O's to process.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, "unit": "ms" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 24, "x": 0, - "y": 40 + "y": 44 }, - "id": 10, + "id": 13, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(hardware_disk_metrics_read_time_milliseconds{job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}[$__interval:])", + "expr": "increase(hardware_disk_metrics_read_time_milliseconds{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}[$__interval:] offset -$__interval)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - read" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - read", + "refId": "Disk read I/O time by instance" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(hardware_disk_metrics_write_time_milliseconds{job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}[$__interval:])", + "expr": "increase(hardware_disk_metrics_write_time_milliseconds{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}[$__interval:] offset -$__interval)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - write" + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - write", + "refId": "Disk write I/O time by instance" } ], "title": "Hardware I/O wait time / $__interval", @@ -876,112 +632,76 @@ } ], "refresh": "1m", - "rows": [ ], - "schemaVersion": 14, - "style": "dark", + "schemaVersion": 39, "tags": [ "mongodb-atlas-mixin" ], "templating": { "list": [ { - "current": { }, - "hide": 0, - "label": "Data Source", - "name": "prometheus_datasource", - "options": [ ], + "label": "Data source", + "name": "datasource", "query": "prometheus", - "refresh": 1, "regex": "", "type": "datasource" }, { - "allValue": null, - "current": { }, + "allValue": ".+", "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "hide": 0, - "includeAll": false, + "includeAll": true, "label": "Job", "multi": true, "name": "job", - "options": [ ], - "query": "label_values(mongodb_network_bytesIn,job)", + "query": "label_values(mongodb_network_bytesIn{job=\"integrations/mongodb-atlas\"}, job)", "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "sort": 1, + "type": "query" }, { "allValue": ".+", - "current": { }, "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "hide": 0, "includeAll": true, "label": "Atlas cluster", "multi": true, "name": "cl_name", - "options": [ ], - "query": "label_values(mongodb_network_bytesIn{job=~\"$job\"},cl_name)", + "query": "label_values(mongodb_network_bytesIn{job=\"integrations/mongodb-atlas\",job=~\"$job\"}, cl_name)", "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "sort": 1, + "type": "query" }, { "allValue": ".+", - "current": { }, "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "hide": 0, "includeAll": true, - "label": "Replica set", + "label": "Instance", "multi": true, - "name": "rs", - "options": [ ], - "query": "label_values(mongodb_network_bytesIn{cl_name=~\"$cl_name\"},rs_nm)", + "name": "instance", + "query": "label_values(mongodb_network_bytesIn{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\"}, instance)", "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "sort": 1, + "type": "query" }, { - "allValue": ".+", - "current": { }, "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "hide": 0, "includeAll": true, - "label": "Node", + "label": "Replica set", "multi": true, - "name": "instance", - "options": [ ], - "query": "label_values(mongodb_network_bytesIn{rs_nm=~\"$rs\"},instance)", + "name": "rs_nm", + "query": "label_values(mongodb_network_bytesIn, rs_nm)", "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "type": "query" } ] }, @@ -989,33 +709,7 @@ "from": "now-30m", "to": "now" }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, "timezone": "default", "title": "MongoDB Atlas performance overview", - "uid": "mongodb-atlas-performance-overview", - "version": 0 + "uid": "mongodb-atlas-performance-overview" } \ No newline at end of file diff --git a/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-sharding-overview.json b/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-sharding-overview.json new file mode 100644 index 000000000..eb83fae2e --- /dev/null +++ b/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-sharding-overview.json @@ -0,0 +1,1113 @@ +{ + "annotations": { + "list": [ ] + }, + "description": "Overview of MongoDB Atlas sharding metrics.", + "links": [ + { + "keepTime": true, + "title": "MongoDB Atlas cluster overview", + "type": "link", + "url": "/d/mongodb-atlas-cluster-overview" + }, + { + "keepTime": true, + "title": "MongoDB Atlas elections overview", + "type": "link", + "url": "/d/mongodb-atlas-elections-overview" + }, + { + "keepTime": true, + "title": "MongoDB Atlas operations overview", + "type": "link", + "url": "/d/mongodb-atlas-operations-overview" + }, + { + "keepTime": true, + "title": "MongoDB Atlas performance overview", + "type": "link", + "url": "/d/mongodb-atlas-performance-overview" + } + ], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 0 + }, + "id": 1, + "panels": [ ], + "title": "General sharding statistics", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Number of times that a thread hit a stale config exception and triggered a metadata refresh.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "unit": "none" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 2, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(mongodb_shardingStatistics_countStaleConfigErrors{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", + "format": "time_series", + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}}", + "refId": "Stale config errors" + } + ], + "title": "Stale configs / $__interval", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Chunk migration frequency for this node.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "unit": "none" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 3, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(mongodb_shardingStatistics_countRecipientMoveChunkStarted{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", + "format": "time_series", + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}}", + "refId": "Chunk migrations started as recipient" + } + ], + "title": "Chunk migrations / $__interval", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "The number of documents cloned on this node when it acted as primary for the donor and acted as primary for the recipient.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "stacking": { + "mode": "normal" + } + }, + "unit": "none" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 9 + }, + "id": 4, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(mongodb_shardingStatistics_countDocsClonedOnDonor{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", + "format": "time_series", + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - donor", + "refId": "Documents cloned on donor" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "increase(mongodb_shardingStatistics_countDocsClonedOnRecipient{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)", + "format": "time_series", + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - recipient", + "refId": "Documents cloned on recipient" + } + ], + "title": "Docs cloned / $__interval", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "The time taken by the catch-up and update metadata phases of a range migration, by this node.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "unit": "ms" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 9 + }, + "id": 5, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "increase(mongodb_shardingStatistics_totalCriticalSectionTimeMillis{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__interval:] offset -$__interval)", + "format": "time_series", + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}}", + "refId": "Critical section time" + } + ], + "title": "Critical section time / $__interval", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 17 + }, + "id": 6, + "panels": [ ], + "title": "Catalog cache", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "The number of incremental and full refreshes that have started.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "stacking": { + "mode": "normal" + } + }, + "unit": "none" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 18 + }, + "id": 7, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(mongodb_shardingStatistics_catalogCache_countIncrementalRefreshesStarted{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", + "format": "time_series", + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - incremental", + "refId": "Incremental catalog cache refreshes" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(mongodb_shardingStatistics_catalogCache_countFullRefreshesStarted{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", + "format": "time_series", + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - full", + "refId": "Full catalog cache refreshes" + } + ], + "title": "Refreshes started / $__interval", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "The number of full and incremental refreshes that have failed.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "unit": "none" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 18 + }, + "id": 8, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(mongodb_shardingStatistics_catalogCache_countFailedRefreshes{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", + "format": "time_series", + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}}", + "refId": "Failed catalog cache refreshes" + } + ], + "title": "Refreshes failed / $__interval", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "The number of times that a thread hit a stale config exception for the catalog cache and triggered a metadata refresh.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "unit": "none" + } + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 26 + }, + "id": 9, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(mongodb_shardingStatistics_catalogCache_countStaleConfigErrors{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", + "format": "time_series", + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}}", + "refId": "Catalog cache stale config errors" + } + ], + "title": "Cache stale configs / $__interval", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "The number of database and collection entries that are currently in the catalog cache.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "unit": "none" + } + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 6, + "y": 26 + }, + "id": 10, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(mongodb_shardingStatistics_catalogCache_numDatabaseEntries{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", + "format": "time_series", + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - database", + "refId": "Database entries in catalog cache" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(mongodb_shardingStatistics_catalogCache_numCollectionEntries{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", + "format": "time_series", + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}} - collection", + "refId": "Collection entries in catalog cache" + } + ], + "title": "Cache entries / $__interval", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "The amount of time that threads had to wait for a refresh of the metadata.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "unit": "µs" + } + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 26 + }, + "id": 11, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(mongodb_shardingStatistics_catalogCache_totalRefreshWaitTimeMicros{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", + "format": "time_series", + "instant": false, + "interval": "2m", + "legendFormat": "{{instance}}", + "refId": "Catalog cache refresh wait time" + } + ], + "title": "Cache refresh time / $__interval", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "The rate of operations that are blocked by a refresh of the catalog cache. Specific to mongos nodes found under replica set \"none\".", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "unit": "ops" + } + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 26 + }, + "id": 12, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(mongodb_shardingStatistics_catalogCache_operationsBlockedByRefresh_countAllOperations{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", + "format": "time_series", + "instant": false, + "legendFormat": "{{instance}}", + "refId": "Operations blocked by catalog cache refresh" + } + ], + "title": "Cache operations blocked", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 34 + }, + "id": 13, + "panels": [ ], + "title": "Shard operations", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "The rate of CRUD operations and aggregation commands run that targeted all shards. Specific to mongos nodes found under replica set \"none\".", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "unit": "ops" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 35 + }, + "id": 14, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "right" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(mongodb_shardingStatistics_numHostsTargeted_find_allShards{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", + "format": "time_series", + "instant": false, + "legendFormat": "{{instance}} - find", + "refId": "Find operations targeting all shards" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(mongodb_shardingStatistics_numHostsTargeted_insert_allShards{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", + "format": "time_series", + "instant": false, + "legendFormat": "{{instance}} - insert", + "refId": "Insert operations targeting all shards" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(mongodb_shardingStatistics_numHostsTargeted_update_allShards{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", + "format": "time_series", + "instant": false, + "legendFormat": "{{instance}} - update", + "refId": "Update operations targeting all shards" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(mongodb_shardingStatistics_numHostsTargeted_delete_allShards{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", + "format": "time_series", + "instant": false, + "legendFormat": "{{instance}} - delete", + "refId": "Delete operations targeting all shards" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(mongodb_shardingStatistics_numHostsTargeted_aggregate_allShards{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", + "format": "time_series", + "instant": false, + "legendFormat": "{{instance}} - aggregate", + "refId": "Aggregate operations targeting all shards" + } + ], + "title": "All shards", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "The rate of CRUD operations and aggregation commands run that targeted more than 1 shard. Specific to mongos nodes found under replica set \"none\".", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "unit": "ops" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 35 + }, + "id": 15, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "right" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(mongodb_shardingStatistics_numHostsTargeted_find_manyShards{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", + "format": "time_series", + "instant": false, + "legendFormat": "{{instance}} - find", + "refId": "Find operations targeting many shards" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(mongodb_shardingStatistics_numHostsTargeted_insert_manyShards{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", + "format": "time_series", + "instant": false, + "legendFormat": "{{instance}} - insert", + "refId": "Insert operations targeting many shards" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(mongodb_shardingStatistics_numHostsTargeted_update_manyShards{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", + "format": "time_series", + "instant": false, + "legendFormat": "{{instance}} - update", + "refId": "Update operations targeting many shards" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(mongodb_shardingStatistics_numHostsTargeted_delete_manyShards{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", + "format": "time_series", + "instant": false, + "legendFormat": "{{instance}} - delete", + "refId": "Delete operations targeting many shards" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(mongodb_shardingStatistics_numHostsTargeted_aggregate_manyShards{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", + "format": "time_series", + "instant": false, + "legendFormat": "{{instance}} - aggregate", + "refId": "Aggregate operations targeting many shards" + } + ], + "title": "Many shards", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "The rate of CRUD operations and aggregation commands run that targeted 1 shard. Specific to mongos nodes found under replica set \"none\".", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "unit": "ops" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 43 + }, + "id": 16, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "right" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(mongodb_shardingStatistics_numHostsTargeted_find_oneShard{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", + "format": "time_series", + "instant": false, + "legendFormat": "{{instance}} - find", + "refId": "Find operations targeting one shard" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(mongodb_shardingStatistics_numHostsTargeted_insert_oneShard{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", + "format": "time_series", + "instant": false, + "legendFormat": "{{instance}} - insert", + "refId": "Insert operations targeting one shard" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(mongodb_shardingStatistics_numHostsTargeted_update_oneShard{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", + "format": "time_series", + "instant": false, + "legendFormat": "{{instance}} - update", + "refId": "Update operations targeting one shard" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(mongodb_shardingStatistics_numHostsTargeted_delete_oneShard{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", + "format": "time_series", + "instant": false, + "legendFormat": "{{instance}} - delete", + "refId": "Delete operations targeting one shard" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(mongodb_shardingStatistics_numHostsTargeted_aggregate_oneShard{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", + "format": "time_series", + "instant": false, + "legendFormat": "{{instance}} - aggregate", + "refId": "Aggregate operations targeting one shard" + } + ], + "title": "One shard", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "The rate of CRUD operations and aggregation commands run on an unsharded collection. Specific to mongos nodes found under replica set \"none\".", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "unit": "ops" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 43 + }, + "id": 17, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "right" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(mongodb_shardingStatistics_numHostsTargeted_find_unsharded{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", + "format": "time_series", + "instant": false, + "legendFormat": "{{instance}} - find", + "refId": "Find operations on unsharded collections" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(mongodb_shardingStatistics_numHostsTargeted_insert_unsharded{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", + "format": "time_series", + "instant": false, + "legendFormat": "{{instance}} - insert", + "refId": "Insert operations on unsharded collections" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(mongodb_shardingStatistics_numHostsTargeted_update_unsharded{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", + "format": "time_series", + "instant": false, + "legendFormat": "{{instance}} - update", + "refId": "Update operations on unsharded collections" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(mongodb_shardingStatistics_numHostsTargeted_delete_unsharded{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", + "format": "time_series", + "instant": false, + "legendFormat": "{{instance}} - delete", + "refId": "Delete operations on unsharded collections" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(mongodb_shardingStatistics_numHostsTargeted_aggregate_unsharded{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", + "format": "time_series", + "instant": false, + "legendFormat": "{{instance}} - aggregate", + "refId": "Aggregate operations on unsharded collections" + } + ], + "title": "Unsharded", + "type": "timeseries" + } + ], + "refresh": "1m", + "schemaVersion": 39, + "tags": [ + "mongodb-atlas-mixin" + ], + "templating": { + "list": [ + { + "label": "Data source", + "name": "datasource", + "query": "prometheus", + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "includeAll": true, + "label": "Job", + "multi": true, + "name": "job", + "query": "label_values(mongodb_network_bytesIn{job=\"integrations/mongodb-atlas\"}, job)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "includeAll": true, + "label": "Atlas cluster", + "multi": true, + "name": "cl_name", + "query": "label_values(mongodb_network_bytesIn{job=\"integrations/mongodb-atlas\",job=~\"$job\"}, cl_name)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "includeAll": true, + "label": "Instance", + "multi": true, + "name": "instance", + "query": "label_values(mongodb_network_bytesIn{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\"}, instance)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "includeAll": true, + "label": "Replica set", + "multi": true, + "name": "rs_nm", + "query": "label_values(mongodb_network_bytesIn, rs_nm)", + "refresh": 2, + "type": "query" + } + ] + }, + "time": { + "from": "now-30m", + "to": "now" + }, + "timezone": "default", + "title": "MongoDB Atlas sharding overview", + "uid": "mongodb-atlas-sharding-overview" + } \ No newline at end of file diff --git a/mongodb-atlas-mixin/g.libsonnet b/mongodb-atlas-mixin/g.libsonnet new file mode 100644 index 000000000..ba90fd9b0 --- /dev/null +++ b/mongodb-atlas-mixin/g.libsonnet @@ -0,0 +1,3 @@ +// grafonnet must be imported with "g" alias +local g = import './vendor/grafonnet-v11.0.0/main.libsonnet'; +g diff --git a/mongodb-atlas-mixin/jsonnetfile.json b/mongodb-atlas-mixin/jsonnetfile.json index 65cebf84b..63d7dc40b 100644 --- a/mongodb-atlas-mixin/jsonnetfile.json +++ b/mongodb-atlas-mixin/jsonnetfile.json @@ -1,15 +1,33 @@ { - "version": 1, - "dependencies": [ - { - "source": { - "git": { - "remote": "https://github.com/grafana/grafonnet-lib.git", - "subdir": "grafonnet" - } - }, - "version": "master" + "version": 1, + "dependencies": [ + { + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs.git", + "subdir": "common-lib" } - ], - "legacyImports": true -} + }, + "version": "master" + }, + { + "source": { + "git": { + "remote": "https://github.com/grafana/grafonnet.git", + "subdir": "gen/grafonnet-v11.4.0" + } + }, + "version": "main" + }, + { + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs.git", + "subdir": "grafana-cloud-integration-utils" + } + }, + "version": "master" + } + ], + "legacyImports": true +} \ No newline at end of file diff --git a/mongodb-atlas-mixin/links.libsonnet b/mongodb-atlas-mixin/links.libsonnet new file mode 100644 index 000000000..529632852 --- /dev/null +++ b/mongodb-atlas-mixin/links.libsonnet @@ -0,0 +1,31 @@ +local g = import './g.libsonnet'; + +{ + local link = g.dashboard.link, + new(this): { + clusterOverview: + link.link.new('MongoDB Atlas cluster overview', '/d/' + this.config.uid + '-cluster-overview') + + link.link.options.withKeepTime(true), + electionsOverview: + link.link.new('MongoDB Atlas elections overview', '/d/' + this.config.uid + '-elections-overview') + + link.link.options.withKeepTime(true), + operationsOverview: + link.link.new('MongoDB Atlas operations overview', '/d/' + this.config.uid + '-operations-overview') + + link.link.options.withKeepTime(true), + performanceOverview: + link.link.new('MongoDB Atlas performance overview', '/d/' + this.config.uid + '-performance-overview') + + link.link.options.withKeepTime(true), + } + if this.config.enableShardingOverview then { + shardingOverview: + link.link.new('MongoDB Atlas sharding overview', '/d/' + this.config.uid + '-sharding-overview') + + link.link.options.withKeepTime(true), + } else { + shardingOverview: {}, + } + { + otherDashboards: + link.dashboards.new('All dashboards', this.config.dashboardTags) + + link.dashboards.options.withIncludeVars(true) + + link.dashboards.options.withKeepTime(true) + + link.dashboards.options.withAsDropdown(true), + }, +} diff --git a/mongodb-atlas-mixin/main.libsonnet b/mongodb-atlas-mixin/main.libsonnet new file mode 100644 index 000000000..a60bfd42c --- /dev/null +++ b/mongodb-atlas-mixin/main.libsonnet @@ -0,0 +1,49 @@ +local alerts = import './alerts.libsonnet'; +local config = import './config.libsonnet'; +local dashboards = import './dashboards.libsonnet'; +local links = import './links.libsonnet'; +local panels = import './panels.libsonnet'; +local rows = import './rows.libsonnet'; +local commonlib = import 'common-lib/common/main.libsonnet'; + +{ + withConfigMixin(config): { + config+: config, + }, + + new(): { + + local this = self, + config: config, + + signals: + { + [sig]: commonlib.signals.unmarshallJsonMulti( + this.config.signals[sig], + type=this.config.metricsSource + ) + for sig in std.objectFields(this.config.signals) + }, + + grafana: { + variables: commonlib.variables.new( + filteringSelector=this.config.filteringSelector, + groupLabels=this.config.groupLabels, + instanceLabels=this.config.instanceLabels, + varMetric='mongodb_network_bytesIn', + customAllValue='.+', + enableLokiLogs=this.config.enableLokiLogs, + ), + annotations: {}, + links: links.new(this), + panels: panels.new(this), + dashboards: dashboards.new(this), + rows: rows.new(this), + }, + + prometheus: { + alerts: alerts.new(this), + recordingRules: {}, + }, + }, +} diff --git a/mongodb-atlas-mixin/mixin.libsonnet b/mongodb-atlas-mixin/mixin.libsonnet index 4d987cf31..d4ebe073e 100644 --- a/mongodb-atlas-mixin/mixin.libsonnet +++ b/mongodb-atlas-mixin/mixin.libsonnet @@ -1,3 +1,31 @@ -(import 'dashboards/dashboards.libsonnet') + -(import 'alerts/alerts.libsonnet') + -(import 'config.libsonnet') +local mongodbAtlaslib = import './main.libsonnet'; +local util = import 'grafana-cloud-integration-utils/util.libsonnet'; + + +local mongodbAtlas = + mongodbAtlaslib.new() + + mongodbAtlaslib.withConfigMixin({ + enableLokiLogs: false, + }); + +local optional_labels = { + rs_nm+: { + label: 'Replica set', + }, + cl_name+: { + label: 'Atlas cluster', + }, +}; + +// populate monitoring-mixin: +{ + grafanaDashboards+:: { + [fname]: + local dashboard = mongodbAtlas.grafana.dashboards[fname]; + dashboard + util.patch_variables(dashboard, optional_labels) + + for fname in std.objectFields(mongodbAtlas.grafana.dashboards) + }, + prometheusAlerts+:: mongodbAtlas.prometheus.alerts, + prometheusRules+:: mongodbAtlas.prometheus.recordingRules, +} diff --git a/mongodb-atlas-mixin/panels.libsonnet b/mongodb-atlas-mixin/panels.libsonnet new file mode 100644 index 000000000..82cd14c1c --- /dev/null +++ b/mongodb-atlas-mixin/panels.libsonnet @@ -0,0 +1,1448 @@ +local g = import './g.libsonnet'; +local commonlib = import 'common-lib/common/main.libsonnet'; + +{ + new(this): { + local signals = this.signals, + + // + // Inventory table panels (for shard, config, mongos nodes) + // + + shardNodesTable: + g.panel.table.new('Shard nodes') + + g.panel.table.panelOptions.withDescription('An inventory table for shard nodes in the environment.') + + g.panel.table.queryOptions.withTargets([ + signals.cluster.shardNodeRepresentativeMetric.asTableTarget(), + ]) + + g.panel.table.queryOptions.withTransformations([ + { id: 'reduce', options: { labelsToFields: true, reducers: ['lastNotNull'] } }, + { id: 'organize', options: { + excludeByName: { Field: true, 'Last *': true, __name__: true, job: true, org_id: true, process_port: true }, + indexByName: { Field: 6, 'Last *': 11, __name__: 7, cl_name: 1, cl_role: 2, group_id: 0, instance: 3, job: 8, org_id: 9, process_port: 10, rs_nm: 4, rs_state: 5 }, + renameByName: { cl_name: 'Cluster', cl_role: 'Role', group_id: 'Group', instance: 'Node', rs_nm: 'Replica set', rs_state: 'State' }, + } }, + { id: 'filterByValue', options: { filters: [{ config: { id: 'equal', options: { value: 'shardsvr' } }, fieldName: 'Role' }], match: 'all', type: 'include' } }, + ]) + + g.panel.table.standardOptions.color.withMode('thresholds') + + g.panel.table.standardOptions.withMappings([ + g.panel.table.standardOptions.mapping.ValueMap.withType() + + g.panel.table.standardOptions.mapping.ValueMap.withOptions({ + '1': { index: 0, text: 'Primary' }, + '2': { index: 1, text: 'Secondary' }, + }), + ]) + + g.panel.table.standardOptions.withOverrides([ + g.panel.table.fieldOverride.byName.new('cl_role') + + g.panel.table.fieldOverride.byName.withProperty('custom.width', 150), + g.panel.table.fieldOverride.byName.new('rs_state') + + g.panel.table.fieldOverride.byName.withProperty('custom.width', 100), + g.panel.table.fieldOverride.byName.new('rs_nm') + + g.panel.table.fieldOverride.byName.withProperty('custom.width', 250), + g.panel.table.fieldOverride.byName.new('cl_name') + + g.panel.table.fieldOverride.byName.withProperty('custom.width', 300), + g.panel.table.fieldOverride.byName.new('group_id') + + g.panel.table.fieldOverride.byName.withProperty('custom.width', 300), + g.panel.table.fieldOverride.byName.new('State') + + g.panel.table.fieldOverride.byName.withProperty('custom.cellOptions', { type: 'color-text' }) + + g.panel.table.fieldOverride.byName.withProperty('mappings', [ + { options: { '1': { color: 'green', index: 0, text: 'Primary' }, '2': { color: 'yellow', index: 1, text: 'Secondary' } }, type: 'value' }, + ]), + ]), + + configNodesTable: + g.panel.table.new('Config nodes') + + g.panel.table.panelOptions.withDescription('An inventory table for config nodes in the environment.') + + g.panel.table.queryOptions.withTargets([signals.cluster.configNodeRepresentativeMetric.asTableTarget()]) + + g.panel.table.queryOptions.withTransformations([ + { id: 'reduce', options: { labelsToFields: true, reducers: ['lastNotNull'] } }, + { id: 'organize', options: { + excludeByName: { Field: true, 'Last *': true, __name__: true, job: true, org_id: true, process_port: true }, + indexByName: { Field: 6, 'Last *': 11, __name__: 7, cl_name: 1, cl_role: 2, group_id: 0, instance: 3, job: 8, org_id: 9, process_port: 10, rs_nm: 4, rs_state: 5 }, + renameByName: { cl_name: 'Cluster', cl_role: 'Role', group_id: 'Group', instance: 'Node', rs_nm: 'Replica set', rs_state: 'State' }, + } }, + { id: 'filterByValue', options: { filters: [{ config: { id: 'equal', options: { value: 'configsvr' } }, fieldName: 'Role' }], match: 'all', type: 'include' } }, + ]) + + g.panel.table.standardOptions.color.withMode('thresholds') + + g.panel.table.standardOptions.withMappings([ + g.panel.table.standardOptions.mapping.ValueMap.withType() + + g.panel.table.standardOptions.mapping.ValueMap.withOptions({ + '1': { index: 0, text: 'Primary' }, + '2': { index: 1, text: 'Secondary' }, + }), + ]) + + g.panel.table.standardOptions.withOverrides([ + g.panel.table.fieldOverride.byName.new('cl_role') + + g.panel.table.fieldOverride.byName.withProperty('custom.width', 150), + g.panel.table.fieldOverride.byName.new('rs_state') + + g.panel.table.fieldOverride.byName.withProperty('custom.width', 100), + g.panel.table.fieldOverride.byName.new('rs_nm') + + g.panel.table.fieldOverride.byName.withProperty('custom.width', 250), + g.panel.table.fieldOverride.byName.new('cl_name') + + g.panel.table.fieldOverride.byName.withProperty('custom.width', 300), + g.panel.table.fieldOverride.byName.new('group_id') + + g.panel.table.fieldOverride.byName.withProperty('custom.width', 300), + g.panel.table.fieldOverride.byName.new('State') + + g.panel.table.fieldOverride.byName.withProperty('custom.cellOptions', { type: 'color-text' }) + + g.panel.table.fieldOverride.byName.withProperty('mappings', [ + { options: { '1': { color: 'green', index: 0, text: 'Primary' }, '2': { color: 'yellow', index: 1, text: 'Secondary' } }, type: 'value' }, + ]), + ]), + + mongosNodesTable: + g.panel.table.new('mongos nodes') + + g.panel.table.panelOptions.withDescription('An inventory table for mongos nodes in the environment.') + + g.panel.table.queryOptions.withTargets([ + signals.cluster.mongosNodeRepresentativeMetric.asTableTarget(), + ]) + + g.panel.table.queryOptions.withTransformations([ + { id: 'reduce', options: { labelsToFields: true, reducers: ['lastNotNull'] } }, + { id: 'organize', options: { + excludeByName: { Field: true, 'Last *': true, __name__: true, job: true, org_id: true, process_port: true, rs_state: true }, + indexByName: { Field: 6, 'Last *': 11, __name__: 7, cl_name: 1, cl_role: 2, group_id: 0, instance: 3, job: 8, org_id: 9, process_port: 10, rs_nm: 4, rs_state: 5 }, + renameByName: { cl_name: 'Cluster', cl_role: 'Role', group_id: 'Group', instance: 'Node', rs_nm: 'Replica set' }, + } }, + { id: 'filterByValue', options: { filters: [{ config: { id: 'equal', options: { value: 'mongos' } }, fieldName: 'Role' }], match: 'all', type: 'include' } }, + ]) + + g.panel.table.standardOptions.color.withMode('thresholds') + + g.panel.table.standardOptions.withMappings([ + g.panel.table.standardOptions.mapping.ValueMap.withType() + + g.panel.table.standardOptions.mapping.ValueMap.withOptions({ + '1': { index: 0, text: 'Primary' }, + '2': { index: 1, text: 'Secondary' }, + }), + ]) + + g.panel.table.standardOptions.withOverrides([ + g.panel.table.fieldOverride.byName.new('cl_role') + + g.panel.table.fieldOverride.byName.withProperty('custom.width', 150), + g.panel.table.fieldOverride.byName.new('rs_state') + + g.panel.table.fieldOverride.byName.withProperty('custom.width', 100), + g.panel.table.fieldOverride.byName.new('rs_nm') + + g.panel.table.fieldOverride.byName.withProperty('custom.width', 250), + g.panel.table.fieldOverride.byName.new('cl_name') + + g.panel.table.fieldOverride.byName.withProperty('custom.width', 300), + g.panel.table.fieldOverride.byName.new('group_id') + + g.panel.table.fieldOverride.byName.withProperty('custom.width', 300), + ]), + + // + // Performance section panels + // + + hardwareIO: + commonlib.panels.generic.timeSeries.base.new('Hardware I/O', targets=[ + signals.cluster.diskReadCount.asTarget(), + signals.cluster.diskWriteCount.asTarget(), + ]) + + g.panel.timeSeries.panelOptions.withDescription("The number of read and write I/O's processed.") + + g.panel.timeSeries.standardOptions.withUnit('iops') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + hardwareIOWaitTime: + commonlib.panels.generic.timeSeries.base.new('Hardware I/O wait time / $__interval', targets=[ + signals.cluster.diskReadTime.asTarget() + + g.query.prometheus.withInterval('2m'), + signals.cluster.diskWriteTime.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The amount of time spent waiting for I/O requests.') + + g.panel.timeSeries.standardOptions.withUnit('ms') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + hardwareCPUInterruptServiceTime: + commonlib.panels.generic.timeSeries.base.new('Hardware CPU interrupt service time / $__interval', targets=[ + signals.cluster.cpuIrqTime.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The amount of time spent servicing CPU interrupts.') + + g.panel.timeSeries.standardOptions.withUnit('ms') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + memoryUsed: + commonlib.panels.generic.timeSeries.base.new('Memory used', targets=[ + signals.cluster.memoryResident.asTarget(), + signals.cluster.memoryVirtual.asTarget(), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The amount of RAM and virtual memory being used.') + + g.panel.timeSeries.standardOptions.withUnit('mbytes') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + diskSpaceUsage: + commonlib.panels.generic.timeSeries.base.new('Disk space usage', targets=[ + signals.cluster.diskSpaceUtilization.asTarget(), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The percentage of hardware space used.') + + g.panel.timeSeries.standardOptions.withUnit('percentunit') + + g.panel.timeSeries.standardOptions.withMin(0) + + g.panel.timeSeries.standardOptions.withMax(1) + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + networkRequests: + commonlib.panels.generic.timeSeries.base.new('Network requests', targets=[ + signals.cluster.networkRequests.asTarget(), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of distinct requests that the server has received.') + + g.panel.timeSeries.standardOptions.withUnit('reqps') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + networkThroughput: + commonlib.panels.generic.timeSeries.base.new('Network throughput', targets=[ + signals.cluster.networkBytesIn.asTarget(), + signals.cluster.networkBytesOut.asTarget(), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of bytes sent and received over network connections.') + + g.panel.timeSeries.standardOptions.withUnit('Bps') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + slowRequests: + commonlib.panels.generic.timeSeries.base.new('Slow requests', targets=[ + signals.cluster.networkSlowDNS.asTarget(), + signals.cluster.networkSlowSSL.asTarget(), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The rate of DNS and SSL operations that took longer than 1 second.') + + g.panel.timeSeries.standardOptions.withUnit('reqps') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + // + // Operations section panels + // + + connections: + commonlib.panels.generic.timeSeries.base.new('Connections', targets=[ + signals.cluster.connectionsCreated.asTarget(), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The rate of incoming connections to the cluster created.') + + g.panel.timeSeries.standardOptions.withUnit('conns/s') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + readwriteOperations: + commonlib.panels.generic.timeSeries.base.new('Read/Write operations', targets=[ + signals.cluster.opLatenciesReadsOps.asTarget(), + signals.cluster.opLatenciesWritesOps.asTarget(), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of read and write operations.') + + g.panel.timeSeries.standardOptions.withUnit('ops') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + operations: + g.panel.pieChart.new('Operations') + + g.panel.pieChart.panelOptions.withDescription('The number of insert, query, update, and delete operations.') + + g.panel.pieChart.queryOptions.withTargets([ + signals.cluster.opCountersInsert.asTarget() + + g.query.prometheus.withInterval('2m'), + signals.cluster.opCountersQuery.asTarget() + + g.query.prometheus.withInterval('2m'), + signals.cluster.opCountersUpdate.asTarget() + + g.query.prometheus.withInterval('2m'), + signals.cluster.opCountersDelete.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.pieChart.options.reduceOptions.withCalcs(['lastNotNull']) + + g.panel.pieChart.options.legend.withDisplayMode('table') + + g.panel.pieChart.options.legend.withPlacement('bottom') + + g.panel.pieChart.options.legend.withValues(['value']) + + g.panel.pieChart.options.tooltip.withMode('multi') + + g.panel.pieChart.options.tooltip.withSort('desc'), + + readwriteLatency: + commonlib.panels.generic.timeSeries.base.new('Read/Write latency / $__interval', targets=[ + signals.cluster.opLatenciesReadsLatency.asTarget() + + g.query.prometheus.withInterval('2m'), + signals.cluster.opLatenciesWritesLatency.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The latency for read and write operations.') + + g.panel.timeSeries.standardOptions.withUnit('µs') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + // + // Locks section panels + // + + currentQueue: + commonlib.panels.generic.timeSeries.base.new('Current queue', targets=[ + signals.cluster.globalLockQueueReaders.asTarget(), + signals.cluster.globalLockQueueWriters.asTarget(), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of reads and writes queued because of a lock.') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + activeClientOperations: + commonlib.panels.generic.timeSeries.base.new('Active client operations', targets=[ + signals.cluster.globalLockActiveReaders.asTarget(), + signals.cluster.globalLockActiveWriters.asTarget(), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of reads and writes being actively performed by connected clients.') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + databaseDeadlocks: + commonlib.panels.generic.timeSeries.base.new('Database deadlocks / $__interval', targets=[ + signals.cluster.dbDeadlockExclusive.asTarget() + + g.query.prometheus.withInterval('2m'), + signals.cluster.dbDeadlockIntentExclusive.asTarget() + + g.query.prometheus.withInterval('2m'), + signals.cluster.dbDeadlockShared.asTarget() + + g.query.prometheus.withInterval('2m'), + signals.cluster.dbDeadlockIntentShared.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of deadlocks for database level locks.') + + g.panel.timeSeries.options.legend.withPlacement('right') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + databaseWaitsAcquiringLock: + commonlib.panels.generic.timeSeries.base.new('Database waits acquiring lock / $__interval', targets=[ + signals.cluster.dbWaitCountExclusive.asTarget() + + g.query.prometheus.withInterval('2m'), + signals.cluster.dbWaitCountIntentExclusive.asTarget() + + g.query.prometheus.withInterval('2m'), + signals.cluster.dbWaitCountShared.asTarget() + + g.query.prometheus.withInterval('2m'), + signals.cluster.dbWaitCountIntentShared.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of times lock acquisitions encounter waits for database level locks.') + + g.panel.timeSeries.options.legend.withPlacement('right') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + // + // Elections section panels + // + + stepUpElectionsCalled: + commonlib.panels.generic.timeSeries.base.new('Step-up elections / $__interval', targets=[ + signals.elections.stepUpCmdCalled.asTarget() + + g.query.prometheus.withInterval('2m'), + signals.elections.stepUpCmdSuccessful.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of elections called and elections won by the node when the primary stepped down.') + + g.panel.timeSeries.options.legend.withPlacement('right') + + g.panel.timeSeries.options.legend.withAsTable(true) + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + priorityElections: + commonlib.panels.generic.timeSeries.base.new('Priority elections / $__interval', targets=[ + signals.elections.priorityTakeoverCalled.asTarget() + + g.query.prometheus.withInterval('2m'), + signals.elections.priorityTakeoverSuccessful.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.options.legend.withPlacement('right') + + g.panel.timeSeries.options.legend.withAsTable(true) + + g.panel.timeSeries.panelOptions.withDescription('The number of elections called and elections won by the node when it had a higher priority than the primary node.') + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + takeoverElections: + commonlib.panels.generic.timeSeries.base.new('Takeover elections / $__interval', targets=[ + signals.elections.catchUpTakeoverCalled.asTarget() + + g.query.prometheus.withInterval('2m'), + signals.elections.catchUpTakeoverSuccessful.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.options.legend.withPlacement('right') + + g.panel.timeSeries.options.legend.withAsTable(true) + + g.panel.timeSeries.panelOptions.withDescription('The number of elections called and elections won by the node when it was more current than the primary node.') + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + timeoutElections: + commonlib.panels.generic.timeSeries.base.new('Timeout elections / $__interval', targets=[ + signals.elections.electionTimeoutCalled.asTarget() + + g.query.prometheus.withInterval('2m'), + signals.elections.electionTimeoutSuccessful.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.options.legend.withPlacement('right') + + g.panel.timeSeries.options.legend.withAsTable(true) + + g.panel.timeSeries.panelOptions.withDescription('The number of elections called and elections won by the node when the time it took to reach the primary node exceeded the election timeout limit.') + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + catchUpsTotal: + commonlib.panels.generic.timeSeries.base.new('Catch-ups / $__interval', targets=[ + signals.elections.numCatchUps.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of times the node had to catch up to the highest known oplog entry.') + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + catchUpsSkipped: + commonlib.panels.generic.timeSeries.base.new('Catch-ups skipped / $__interval', targets=[ + signals.elections.numCatchUpsSkipped.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of times the node skipped the catch up process when it was the newly elected primary.') + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + catchUpsSucceeded: + commonlib.panels.generic.timeSeries.base.new('Catch-ups succeeded / $__interval', targets=[ + signals.elections.numCatchUpsSucceeded.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of times the node succeeded in catching up when it was the newly elected primary.') + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + catchUpsFailed: + commonlib.panels.generic.timeSeries.base.new('Catch-ups failed / $__interval', targets=[ + signals.elections.numCatchUpsFailedWithError.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of times the node failed in catching up when it was the newly elected primary.') + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + catchUpsTimedOut: + commonlib.panels.generic.timeSeries.base.new('Catch-up timeouts / $__interval', targets=[ + signals.elections.numCatchUpsTimedOut.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of times the node timed out during the catch-up process when it was the newly elected primary.') + + g.panel.timeSeries.standardOptions.withUnit('none'), + + averageCatchUpOps: + commonlib.panels.generic.timeSeries.base.new('Average catch-up operations', targets=[ + signals.elections.averageCatchUpOps.asTarget(), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The average number of operations done during the catch-up process when this node is the newly elected primary.') + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + // + // Operations Overview dashboard panels + // + + // Section 1: Operation Counters (by type) - cluster-level aggregated + insertOperations: + commonlib.panels.generic.timeSeries.base.new('Insert operations', targets=[ + signals.operations.opCountersInsert.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of insert operations.') + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + queryOperations: + commonlib.panels.generic.timeSeries.base.new('Query operations', targets=[ + signals.operations.opCountersQuery.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of query operations.') + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + updateOperations: + commonlib.panels.generic.timeSeries.base.new('Update operations', targets=[ + signals.operations.opCountersUpdate.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of update operations.') + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + deleteOperations: + commonlib.panels.generic.timeSeries.base.new('Delete operations', targets=[ + signals.operations.opCountersDelete.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of delete operations.') + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + // Section 2: Operation Counters (by instance) + insertOperationsByInstance: + commonlib.panels.generic.timeSeries.base.new('Insert operations', targets=[ + signals.operations.opCountersInsertByInstance.asTarget(), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The rate of insert operations the node has received.') + + g.panel.timeSeries.standardOptions.withUnit('ops') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + queryOperationsByInstance: + commonlib.panels.generic.timeSeries.base.new('Query operations', targets=[ + signals.operations.opCountersQueryByInstance.asTarget(), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The rate of query operations the node has received.') + + g.panel.timeSeries.standardOptions.withUnit('ops') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + updateOperationsByInstance: + commonlib.panels.generic.timeSeries.base.new('Update operations', targets=[ + signals.operations.opCountersUpdate.asTarget(), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The rate of update operations this node has received.') + + g.panel.timeSeries.standardOptions.withUnit('ops') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + deleteOperationsByInstance: + commonlib.panels.generic.timeSeries.base.new('Delete operations', targets=[ + signals.operations.opCountersDeleteByInstance.asTarget(), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The rate of delete operations this node has received.') + + g.panel.timeSeries.standardOptions.withUnit('ops') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + // Section 3: Operation Latencies (cluster) + readOperationCount: + commonlib.panels.generic.timeSeries.base.new('Read operation count', targets=[ + signals.operations.opLatenciesReadsOps.asTarget(), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of read operations.') + + g.panel.timeSeries.standardOptions.withUnit('ops') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + writeOperationCount: + commonlib.panels.generic.timeSeries.base.new('Write operation count', targets=[ + signals.operations.opLatenciesWritesOps.asTarget(), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of write operations.') + + g.panel.timeSeries.standardOptions.withUnit('ops') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + readOperationLatency: + commonlib.panels.generic.timeSeries.base.new('Read operation latency / $__interval', targets=[ + signals.operations.opLatenciesReadsLatency.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The latency time for read operations performed by this node.') + + g.panel.timeSeries.standardOptions.withUnit('µs') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + writeOperationLatency: + commonlib.panels.generic.timeSeries.base.new('Write operation latency / $__interval', targets=[ + signals.operations.opLatenciesWritesLatency.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The latency time for write operations performed by this node.') + + g.panel.timeSeries.standardOptions.withUnit('µs') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + // Section 4: Operation Latencies (by instance) + readOperationCountByInstance: + commonlib.panels.generic.timeSeries.base.new('Read operation count', targets=[ + signals.operations.opLatenciesReadsOpsByInstance.asTarget(), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of read operations per instance.') + + g.panel.timeSeries.standardOptions.withUnit('ops') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + writeOperationCountByInstance: + commonlib.panels.generic.timeSeries.base.new('Write operation count', targets=[ + signals.operations.opLatenciesWritesOpsByInstance.asTarget(), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of write operations per instance.') + + g.panel.timeSeries.standardOptions.withUnit('ops') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + readOperationLatencyByInstance: + commonlib.panels.generic.timeSeries.base.new('Read operation latency / $__interval', targets=[ + signals.operations.opLatenciesReadsLatencyByInstance.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The latency time for read operations performed per instance.') + + g.panel.timeSeries.standardOptions.withUnit('µs') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + writeOperationLatencyByInstance: + commonlib.panels.generic.timeSeries.base.new('Write operation latency / $__interval', targets=[ + signals.operations.opLatenciesWritesLatencyByInstance.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The latency time for write operations performed per instance.') + + g.panel.timeSeries.standardOptions.withUnit('µs') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + // Section 5: Average Latencies (calculated) + avgReadLatency: + commonlib.panels.generic.timeSeries.base.new('Average read latency / $__interval', targets=[ + signals.operations.avgReadLatency.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('Average latency per read operation.') + + g.panel.timeSeries.standardOptions.withUnit('µs') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + avgWriteLatency: + commonlib.panels.generic.timeSeries.base.new('Average write latency / $__interval', targets=[ + signals.operations.avgWriteLatency.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('Average latency per write operation.') + + g.panel.timeSeries.standardOptions.withUnit('µs') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + avgReadLatencyByInstance: + commonlib.panels.generic.timeSeries.base.new('Average read latency / $__interval', targets=[ + signals.operations.avgReadLatencyByInstance.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('Average latency per read operation by instance.') + + g.panel.timeSeries.standardOptions.withUnit('µs') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + avgWriteLatencyByInstance: + commonlib.panels.generic.timeSeries.base.new('Average write latency / $__interval', targets=[ + signals.operations.avgWriteLatencyByInstance.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('Average latency per write operation by instance.') + + g.panel.timeSeries.standardOptions.withUnit('µs') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + // Operations dashboard - connections + currentConnectionsOperations: + commonlib.panels.generic.timeSeries.base.new('Current connections', targets=[ + signals.operations.connectionsCurrent.asTarget(), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of incoming connections from clients to the node.') + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + activeConnectionsOperations: + commonlib.panels.generic.timeSeries.base.new('Active connections', targets=[ + signals.operations.connectionsActive.asTarget(), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of connections that currently have operations in progress.') + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + // Read/write operations (stacked) + readwriteOperationsOperations: + commonlib.panels.generic.timeSeries.base.new('Read and write operations', targets=[ + signals.operations.opLatenciesReadsOpsByInstanceStacked.asTarget(), + signals.operations.opLatenciesWritesOpsByInstanceStacked.asTarget(), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The rate of read and write operations performed by the node.') + + g.panel.timeSeries.standardOptions.withUnit('ops') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + readwriteLatencyOperations: + commonlib.panels.generic.timeSeries.base.new('Read and write latency / $__interval', targets=[ + signals.operations.opLatenciesReadsLatencyByInstanceStacked.asTarget() + + g.query.prometheus.withInterval('2m'), + signals.operations.opLatenciesWritesLatencyByInstanceStacked.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The latency time for read and write operations performed by this node.') + + g.panel.timeSeries.standardOptions.withUnit('µs') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + // Operations dashboard - database locks + databaseDeadlocksOperations: + commonlib.panels.generic.timeSeries.base.new('Database deadlocks / $__interval', targets=[ + signals.operations.dbDeadlockExclusiveByInstance.asTarget() + + g.query.prometheus.withInterval('2m'), + signals.operations.dbDeadlockIntentExclusiveByInstance.asTarget() + + g.query.prometheus.withInterval('2m'), + signals.operations.dbDeadlockSharedByInstance.asTarget() + + g.query.prometheus.withInterval('2m'), + signals.operations.dbDeadlockIntentSharedByInstance.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of deadlocks that have occurred for the database lock.') + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + databaseWaitCountOperations: + commonlib.panels.generic.timeSeries.base.new('Database wait count / $__interval', targets=[ + signals.operations.dbWaitCountExclusiveByInstance.asTarget() + + g.query.prometheus.withInterval('2m'), + signals.operations.dbWaitCountIntentExclusiveByInstance.asTarget() + + g.query.prometheus.withInterval('2m'), + signals.operations.dbWaitCountSharedByInstance.asTarget() + + g.query.prometheus.withInterval('2m'), + signals.operations.dbWaitCountIntentSharedByInstance.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of database lock acquisitions that had to wait.') + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + databaseWaitTimeOperations: + commonlib.panels.generic.timeSeries.base.new('Database wait time / $__interval', targets=[ + signals.operations.dbAcqTimeExclusiveByInstance.asTarget() + + g.query.prometheus.withInterval('2m'), + signals.operations.dbAcqTimeIntentExclusiveByInstance.asTarget() + + g.query.prometheus.withInterval('2m'), + signals.operations.dbAcqTimeSharedByInstance.asTarget() + + g.query.prometheus.withInterval('2m'), + signals.operations.dbAcqTimeIntentSharedByInstance.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The time spent waiting for the database lock acquisition.') + + g.panel.timeSeries.standardOptions.withUnit('µs') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + // Operations dashboard - collection locks + collectionDeadlocksOperations: + commonlib.panels.generic.timeSeries.base.new('Collection deadlocks / $__interval', targets=[ + signals.operations.collDeadlockExclusiveByInstance.asTarget() + + g.query.prometheus.withInterval('2m'), + signals.operations.collDeadlockIntentExclusiveByInstance.asTarget() + + g.query.prometheus.withInterval('2m'), + signals.operations.collDeadlockSharedByInstance.asTarget() + + g.query.prometheus.withInterval('2m'), + signals.operations.collDeadlockIntentSharedByInstance.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of deadlocks that have occurred for the collection lock.') + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + collectionWaitCountOperations: + commonlib.panels.generic.timeSeries.base.new('Collection wait count / $__interval', targets=[ + signals.operations.collWaitCountExclusiveByInstance.asTarget() + + g.query.prometheus.withInterval('2m'), + signals.operations.collWaitCountIntentExclusiveByInstance.asTarget() + + g.query.prometheus.withInterval('2m'), + signals.operations.collWaitCountSharedByInstance.asTarget() + + g.query.prometheus.withInterval('2m'), + signals.operations.collWaitCountIntentSharedByInstance.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of collection lock acquisitions that had to wait.') + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + collectionWaitTimeOperations: + commonlib.panels.generic.timeSeries.base.new('Collection wait time / $__interval', targets=[ + signals.operations.collAcqTimeExclusiveByInstance.asTarget() + + g.query.prometheus.withInterval('2m'), + signals.operations.collAcqTimeIntentExclusiveByInstance.asTarget() + + g.query.prometheus.withInterval('2m'), + signals.operations.collAcqTimeSharedByInstance.asTarget() + + g.query.prometheus.withInterval('2m'), + signals.operations.collAcqTimeIntentSharedByInstance.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The time spent waiting for the collection lock acquisition.') + + g.panel.timeSeries.standardOptions.withUnit('µs') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + // + // Performance Overview dashboard panels + // + + // Memory and hardware panels + memoryPerformance: + commonlib.panels.generic.timeSeries.base.new('Memory', targets=[ + signals.performance.memoryResidentByInstance.asTarget(), + signals.performance.memoryVirtualByInstance.asTarget(), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The amount of RAM and virtual memory being used by the database process.') + + g.panel.timeSeries.standardOptions.withUnit('mbytes') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + hardwareCPUInterruptServiceTimePerformance: + commonlib.panels.generic.timeSeries.base.new('Hardware CPU interrupt service time / $__interval', targets=[ + signals.performance.cpuIrqTimeByInstance.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The amount of time spent servicing CPU interrupts.') + + g.panel.timeSeries.standardOptions.withUnit('ms') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + diskSpacePerformance: + commonlib.panels.generic.timeSeries.base.new('Disk space', targets=[ + signals.performance.diskSpaceFree.asTarget(), + signals.performance.diskSpaceUsed.asTarget(), + ]) + + g.panel.timeSeries.panelOptions.withDescription("The amount of free and used disk space on this node's hardware.") + + g.panel.timeSeries.standardOptions.withUnit('decbytes') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + diskSpaceUtilizationPerformance: + commonlib.panels.generic.timeSeries.base.new('Disk space utilization', targets=[ + signals.performance.diskSpaceUtilizationByInstance.asTarget(), + ]) + + g.panel.timeSeries.panelOptions.withDescription("The disk space utilization for this node's hardware.") + + g.panel.timeSeries.standardOptions.withUnit('percentunit') + + g.panel.timeSeries.standardOptions.withMin(0) + + g.panel.timeSeries.standardOptions.withMax(1) + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + networkRequestsPerformance: + commonlib.panels.generic.timeSeries.base.new('Network requests', targets=[ + signals.performance.networkRequestsByInstance.asTarget(), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The rate of distinct requests the node has received.') + + g.panel.timeSeries.standardOptions.withUnit('reqps') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + slowNetworkRequestsPerformance: + commonlib.panels.generic.timeSeries.base.new('Slow network requests', targets=[ + signals.performance.networkSlowDNSByInstance.asTarget(), + signals.performance.networkSlowSSLByInstance.asTarget(), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The rate of slow DNS and SSL operations received by this node.') + + g.panel.timeSeries.standardOptions.withUnit('reqps') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + networkThroughputPerformance: + commonlib.panels.generic.timeSeries.base.new('Network throughput', targets=[ + signals.performance.networkBytesInByInstance.asTarget(), + signals.performance.networkBytesOutByInstance.asTarget(), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The rate of bytes sent and received by the node over a network connection.') + + g.panel.timeSeries.standardOptions.withUnit('Bps') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + hardwareIOPerformance: + commonlib.panels.generic.timeSeries.base.new('Hardware I/O', targets=[ + signals.performance.diskReadCountByInstance.asTarget(), + signals.performance.diskWriteCountByInstance.asTarget(), + ]) + + g.panel.timeSeries.panelOptions.withDescription("The rate of read and write I/O's processed by this node.") + + g.panel.timeSeries.standardOptions.withUnit('iops') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + hardwareIOWaitTimePerformance: + commonlib.panels.generic.timeSeries.base.new('Hardware I/O wait time / $__interval', targets=[ + signals.performance.diskReadTimeByInstance.asTarget() + + g.query.prometheus.withInterval('2m'), + signals.performance.diskWriteTimeByInstance.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription("The amount of time the node has spent waiting for read and write I/O's to process.") + + g.panel.timeSeries.standardOptions.withUnit('ms') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + // Section 1: Connection Metrics + currentConnections: + commonlib.panels.generic.timeSeries.base.new('Current connections', targets=[ + signals.performance.connectionsCurrent.asTarget(), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The current number of active connections.') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + activeConnections: + commonlib.panels.generic.timeSeries.base.new('Active connections', targets=[ + signals.performance.connectionsActive.asTarget(), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The current number of connections with operations in progress.') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + // Section 2: Database Lock Deadlocks (Cluster) + dbLockDeadlocksExclusive: + commonlib.panels.generic.timeSeries.base.new('Database exclusive lock deadlocks / $__interval', targets=[ + signals.performance.dbDeadlockExclusive.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of database exclusive lock deadlocks.') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + dbLockDeadlocksIntentExclusive: + commonlib.panels.generic.timeSeries.base.new('Database intent-exclusive lock deadlocks / $__interval', targets=[ + signals.performance.dbDeadlockIntentExclusive.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of database intent-exclusive lock deadlocks.') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + dbLockDeadlocksShared: + commonlib.panels.generic.timeSeries.base.new('Database shared lock deadlocks / $__interval', targets=[ + signals.performance.dbDeadlockShared.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of database shared lock deadlocks.') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + dbLockDeadlocksIntentShared: + commonlib.panels.generic.timeSeries.base.new('Database intent-shared lock deadlocks / $__interval', targets=[ + signals.performance.dbDeadlockIntentShared.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of database intent-shared lock deadlocks.') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + // Section 3: Database Lock Deadlocks (By Instance) + dbLockDeadlocksExclusiveByInstance: + commonlib.panels.generic.timeSeries.base.new('Database exclusive lock deadlocks / $__interval', targets=[ + signals.performance.dbDeadlockExclusiveByInstance.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of database exclusive lock deadlocks per instance.') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + dbLockDeadlocksIntentExclusiveByInstance: + commonlib.panels.generic.timeSeries.base.new('Database intent-exclusive lock deadlocks / $__interval', targets=[ + signals.performance.dbDeadlockIntentExclusiveByInstance.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of database intent-exclusive lock deadlocks per instance.') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + dbLockDeadlocksSharedByInstance: + commonlib.panels.generic.timeSeries.base.new('Database shared lock deadlocks / $__interval', targets=[ + signals.performance.dbDeadlockSharedByInstance.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of database shared lock deadlocks per instance.') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + dbLockDeadlocksIntentSharedByInstance: + commonlib.panels.generic.timeSeries.base.new('Database intent-shared lock deadlocks / $__interval', targets=[ + signals.performance.dbDeadlockIntentSharedByInstance.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of database intent-shared lock deadlocks per instance.') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + // Section 4: Database Lock Wait Counts (Cluster) + dbLockWaitCountExclusive: + commonlib.panels.generic.timeSeries.base.new('Database exclusive lock wait count / $__interval', targets=[ + signals.performance.dbWaitCountExclusive.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of times lock acquisitions encounter waits for database exclusive locks.') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + dbLockWaitCountIntentExclusive: + commonlib.panels.generic.timeSeries.base.new('Database intent-exclusive lock wait count / $__interval', targets=[ + signals.performance.dbWaitCountIntentExclusive.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of times lock acquisitions encounter waits for database intent-exclusive locks.') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + dbLockWaitCountShared: + commonlib.panels.generic.timeSeries.base.new('Database shared lock wait count / $__interval', targets=[ + signals.performance.dbWaitCountShared.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of times lock acquisitions encounter waits for database shared locks.') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + dbLockWaitCountIntentShared: + commonlib.panels.generic.timeSeries.base.new('Database intent-shared lock wait count / $__interval', targets=[ + signals.performance.dbWaitCountIntentShared.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of times lock acquisitions encounter waits for database intent-shared locks.') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + // Section 5: Database Lock Wait Counts (By Instance) + dbLockWaitCountExclusiveByInstance: + commonlib.panels.generic.timeSeries.base.new('Database exclusive lock wait count / $__interval', targets=[ + signals.performance.dbWaitCountExclusiveByInstance.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of times lock acquisitions encounter waits for database exclusive locks per instance.') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + dbLockWaitCountIntentExclusiveByInstance: + commonlib.panels.generic.timeSeries.base.new('Database intent-exclusive lock wait count / $__interval', targets=[ + signals.performance.dbWaitCountIntentExclusiveByInstance.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of times lock acquisitions encounter waits for database intent-exclusive locks per instance.') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + dbLockWaitCountSharedByInstance: + commonlib.panels.generic.timeSeries.base.new('Database shared lock wait count / $__interval', targets=[ + signals.performance.dbWaitCountSharedByInstance.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of times lock acquisitions encounter waits for database shared locks per instance.') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + dbLockWaitCountIntentSharedByInstance: + commonlib.panels.generic.timeSeries.base.new('Database intent-shared lock wait count / $__interval', targets=[ + signals.performance.dbWaitCountIntentSharedByInstance.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of times lock acquisitions encounter waits for database intent-shared locks per instance.') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + // Section 6: Database Lock Acquisition Time + dbLockAcqTimeExclusive: + commonlib.panels.generic.timeSeries.base.new('Database exclusive lock acquisition time / $__interval', targets=[ + signals.performance.dbAcqTimeExclusive.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The time spent acquiring database exclusive locks.') + + g.panel.timeSeries.standardOptions.withUnit('µs') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + dbLockAcqTimeIntentExclusive: + commonlib.panels.generic.timeSeries.base.new('Database intent-exclusive lock acquisition time / $__interval', targets=[ + signals.performance.dbAcqTimeIntentExclusive.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The time spent acquiring database intent-exclusive locks.') + + g.panel.timeSeries.standardOptions.withUnit('µs') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + dbLockAcqTimeShared: + commonlib.panels.generic.timeSeries.base.new('Database shared lock acquisition time / $__interval', targets=[ + signals.performance.dbAcqTimeShared.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The time spent acquiring database shared locks.') + + g.panel.timeSeries.standardOptions.withUnit('µs') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + dbLockAcqTimeIntentShared: + commonlib.panels.generic.timeSeries.base.new('Database intent-shared lock acquisition time / $__interval', targets=[ + signals.performance.dbAcqTimeIntentShared.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The time spent acquiring database intent-shared locks.') + + g.panel.timeSeries.standardOptions.withUnit('µs') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + // Section 7: Collection Lock Deadlocks + collLockDeadlocksExclusive: + commonlib.panels.generic.timeSeries.base.new('Collection exclusive lock deadlocks / $__interval', targets=[ + signals.performance.collDeadlockExclusive.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of collection exclusive lock deadlocks.') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + collLockDeadlocksIntentExclusive: + commonlib.panels.generic.timeSeries.base.new('Collection intent-exclusive lock deadlocks / $__interval', targets=[ + signals.performance.collDeadlockIntentExclusive.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of collection intent-exclusive lock deadlocks.') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + collLockDeadlocksShared: + commonlib.panels.generic.timeSeries.base.new('Collection shared lock deadlocks / $__interval', targets=[ + signals.performance.collDeadlockShared.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of collection shared lock deadlocks.') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + collLockDeadlocksIntentShared: + commonlib.panels.generic.timeSeries.base.new('Collection intent-shared lock deadlocks / $__interval', targets=[ + signals.performance.collDeadlockIntentShared.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of collection intent-shared lock deadlocks.') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + // Section 8: Collection Lock Wait Counts + collLockWaitCountExclusive: + commonlib.panels.generic.timeSeries.base.new('Collection exclusive lock wait count / $__interval', targets=[ + signals.performance.collWaitCountExclusive.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of times lock acquisitions encounter waits for collection exclusive locks.') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + collLockWaitCountIntentExclusive: + commonlib.panels.generic.timeSeries.base.new('Collection intent-exclusive lock wait count / $__interval', targets=[ + signals.performance.collWaitCountIntentExclusive.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of times lock acquisitions encounter waits for collection intent-exclusive locks.') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + collLockWaitCountShared: + commonlib.panels.generic.timeSeries.base.new('Collection shared lock wait count / $__interval', targets=[ + signals.performance.collWaitCountShared.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of times lock acquisitions encounter waits for collection shared locks.') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + collLockWaitCountIntentShared: + commonlib.panels.generic.timeSeries.base.new('Collection intent-shared lock wait count / $__interval', targets=[ + signals.performance.collWaitCountIntentShared.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of times lock acquisitions encounter waits for collection intent-shared locks.') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + // Section 9: Collection Lock Acquisition Time + collLockAcqTimeExclusive: + commonlib.panels.generic.timeSeries.base.new('Collection exclusive lock acquisition time / $__interval', targets=[ + signals.performance.collAcqTimeExclusive.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The time spent acquiring collection exclusive locks.') + + g.panel.timeSeries.standardOptions.withUnit('µs') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + collLockAcqTimeIntentExclusive: + commonlib.panels.generic.timeSeries.base.new('Collection intent-exclusive lock acquisition time / $__interval', targets=[ + signals.performance.collAcqTimeIntentExclusive.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The time spent acquiring collection intent-exclusive locks.') + + g.panel.timeSeries.standardOptions.withUnit('µs') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + collLockAcqTimeShared: + commonlib.panels.generic.timeSeries.base.new('Collection shared lock acquisition time / $__interval', targets=[ + signals.performance.collAcqTimeShared.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The time spent acquiring collection shared locks.') + + g.panel.timeSeries.standardOptions.withUnit('µs') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + collLockAcqTimeIntentShared: + commonlib.panels.generic.timeSeries.base.new('Collection intent-shared lock acquisition time / $__interval', targets=[ + signals.performance.collAcqTimeIntentShared.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The time spent acquiring collection intent-shared locks.') + + g.panel.timeSeries.standardOptions.withUnit('µs') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + // + // Sharding Overview dashboard panels + // + + // General sharding statistics + staleConfigErrors: + commonlib.panels.generic.timeSeries.base.new('Stale configs / $__interval', targets=[ + signals.sharding.staleConfigErrors.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('Number of times that a thread hit a stale config exception and triggered a metadata refresh.') + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + chunkMigrations: + commonlib.panels.generic.timeSeries.base.new('Chunk migrations / $__interval', targets=[ + signals.sharding.moveChunksStarted.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('Chunk migration frequency for this node.') + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + docsCloned: + commonlib.panels.generic.timeSeries.base.new('Docs cloned / $__interval', targets=[ + signals.sharding.docsClonedDonor.asTarget() + + g.query.prometheus.withInterval('2m'), + signals.sharding.docsClonedRecipient.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of documents cloned on this node when it acted as primary for the donor and acted as primary for the recipient.') + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + criticalSectionTime: + commonlib.panels.generic.timeSeries.base.new('Critical section time / $__interval', targets=[ + signals.sharding.criticalSectionTime.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The time taken by the catch-up and update metadata phases of a range migration, by this node.') + + g.panel.timeSeries.standardOptions.withUnit('ms') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + // Catalog cache panels + catalogCacheRefreshesStarted: + commonlib.panels.generic.timeSeries.base.new('Refreshes started / $__interval', targets=[ + signals.sharding.catalogCacheIncrementalRefreshes.asTarget() + + g.query.prometheus.withInterval('2m'), + signals.sharding.catalogCacheFullRefreshes.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of incremental and full refreshes that have started.') + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + catalogCacheRefreshesFailed: + commonlib.panels.generic.timeSeries.base.new('Refreshes failed / $__interval', targets=[ + signals.sharding.catalogCacheFailedRefreshes.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of full and incremental refreshes that have failed.') + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + catalogCacheStaleConfigs: + commonlib.panels.generic.timeSeries.base.new('Cache stale configs / $__interval', targets=[ + signals.sharding.catalogCacheStaleConfigErrors.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of times that a thread hit a stale config exception for the catalog cache and triggered a metadata refresh.') + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + catalogCacheEntries: + commonlib.panels.generic.timeSeries.base.new('Cache entries / $__interval', targets=[ + signals.sharding.catalogCacheDatabaseEntries.asTarget() + + g.query.prometheus.withInterval('2m'), + signals.sharding.catalogCacheCollectionEntries.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The number of database and collection entries that are currently in the catalog cache.') + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + catalogCacheRefreshTime: + commonlib.panels.generic.timeSeries.base.new('Cache refresh time / $__interval', targets=[ + signals.sharding.catalogCacheRefreshWaitTime.asTarget() + + g.query.prometheus.withInterval('2m'), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The amount of time that threads had to wait for a refresh of the metadata.') + + g.panel.timeSeries.standardOptions.withUnit('µs') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + catalogCacheOperationsBlocked: + commonlib.panels.generic.timeSeries.base.new('Cache operations blocked', targets=[ + signals.sharding.catalogCacheOpsBlocked.asTarget(), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The rate of operations that are blocked by a refresh of the catalog cache. Specific to mongos nodes found under replica set "none".') + + g.panel.timeSeries.standardOptions.withUnit('ops') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + // Shard targeting operations panels + shardTargetingAllShards: + commonlib.panels.generic.timeSeries.base.new('All shards', targets=[ + signals.sharding.targetingFindAllShards.asTarget(), + signals.sharding.targetingInsertAllShards.asTarget(), + signals.sharding.targetingUpdateAllShards.asTarget(), + signals.sharding.targetingDeleteAllShards.asTarget(), + signals.sharding.targetingAggregateAllShards.asTarget(), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The rate of CRUD operations and aggregation commands run that targeted all shards. Specific to mongos nodes found under replica set "none".') + + g.panel.timeSeries.standardOptions.withUnit('ops') + + g.panel.timeSeries.options.legend.withPlacement('right') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + shardTargetingManyShards: + commonlib.panels.generic.timeSeries.base.new('Many shards', targets=[ + signals.sharding.targetingFindManyShards.asTarget(), + signals.sharding.targetingInsertManyShards.asTarget(), + signals.sharding.targetingUpdateManyShards.asTarget(), + signals.sharding.targetingDeleteManyShards.asTarget(), + signals.sharding.targetingAggregateManyShards.asTarget(), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The rate of CRUD operations and aggregation commands run that targeted more than 1 shard. Specific to mongos nodes found under replica set "none".') + + g.panel.timeSeries.standardOptions.withUnit('ops') + + g.panel.timeSeries.options.legend.withPlacement('right') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + shardTargetingOneShard: + commonlib.panels.generic.timeSeries.base.new('One shard', targets=[ + signals.sharding.targetingFindOneShard.asTarget(), + signals.sharding.targetingInsertOneShard.asTarget(), + signals.sharding.targetingUpdateOneShard.asTarget(), + signals.sharding.targetingDeleteOneShard.asTarget(), + signals.sharding.targetingAggregateOneShard.asTarget(), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The rate of CRUD operations and aggregation commands run that targeted 1 shard. Specific to mongos nodes found under replica set "none".') + + g.panel.timeSeries.standardOptions.withUnit('ops') + + g.panel.timeSeries.options.legend.withPlacement('right') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + shardTargetingUnsharded: + commonlib.panels.generic.timeSeries.base.new('Unsharded', targets=[ + signals.sharding.targetingFindUnsharded.asTarget(), + signals.sharding.targetingInsertUnsharded.asTarget(), + signals.sharding.targetingUpdateUnsharded.asTarget(), + signals.sharding.targetingDeleteUnsharded.asTarget(), + signals.sharding.targetingAggregateUnsharded.asTarget(), + ]) + + g.panel.timeSeries.panelOptions.withDescription('The rate of CRUD operations and aggregation commands run on an unsharded collection. Specific to mongos nodes found under replica set "none".') + + g.panel.timeSeries.standardOptions.withUnit('ops') + + g.panel.timeSeries.options.legend.withPlacement('right') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + }, +} diff --git a/mongodb-atlas-mixin/prometheus_rules_out/prometheus_alerts.yaml b/mongodb-atlas-mixin/prometheus_rules_out/prometheus_alerts.yaml index c71ae4633..5594c4fe5 100644 --- a/mongodb-atlas-mixin/prometheus_rules_out/prometheus_alerts.yaml +++ b/mongodb-atlas-mixin/prometheus_rules_out/prometheus_alerts.yaml @@ -1,7 +1,7 @@ groups: - name: mongodb-atlas-alerts rules: - - alert: MongoDBAtlasHighNumberOfCollectionExclusiveDeadlocks + - alert: MongoDBAtlasCollExclusiveDeadlocks annotations: description: The number of collection exclusive-lock deadlocks occurring on node {{$labels.instance}} in cluster {{$labels.cl_name}} is {{printf "%.0f" $value}} which is above the threshold of 10. summary: There is a high number of collection exclusive deadlocks occurring. @@ -10,7 +10,7 @@ groups: for: 5m labels: severity: warning - - alert: MongoDBAtlasHighNumberOfCollectionIntentExclusiveDeadlocks + - alert: MongoDBAtlasCollIntentExclDeadlocks annotations: description: The number of collection intent-exclusive-lock deadlocks occurring on node {{$labels.instance}} in cluster {{$labels.cl_name}} is {{printf "%.0f" $value}} which is above the threshold of 10. summary: There is a high number of collection intent-exclusive deadlocks occurring. @@ -19,7 +19,7 @@ groups: for: 5m labels: severity: warning - - alert: MongoDBAtlasHighNumberOfCollectionSharedDeadlocks + - alert: MongoDBAtlasCollSharedDeadlocks annotations: description: The number of collection shared-lock deadlocks occurring on node {{$labels.instance}} in cluster {{$labels.cl_name}} is {{printf "%.0f" $value}} which is above the threshold of 10. summary: There is a high number of collection shared deadlocks occurring. @@ -28,7 +28,7 @@ groups: for: 5m labels: severity: warning - - alert: MongoDBAtlasHighNumberOfCollectionIntentSharedDeadlocks + - alert: MongoDBAtlasCollIntentSharedDeadlocks annotations: description: The number of collection intent-shared-lock deadlocks occurring on node {{$labels.instance}} in cluster {{$labels.cl_name}} is {{printf "%.0f" $value}} which is above the threshold of 10. summary: There is a high number of collection intent-shared deadlocks occurring. @@ -37,7 +37,7 @@ groups: for: 5m labels: severity: warning - - alert: MongoDBAtlasHighNumberOfDatabaseExclusiveDeadlocks + - alert: MongoDBAtlasDBExclusiveDeadlocks annotations: description: The number of database exclusive-lock deadlocks occurring on node {{$labels.instance}} in cluster {{$labels.cl_name}} is {{printf "%.0f" $value}} which is above the threshold of 10. summary: There is a high number of database exclusive deadlocks occurring. @@ -46,7 +46,7 @@ groups: for: 5m labels: severity: warning - - alert: MongoDBAtlasHighNumberOfDatabaseIntentExclusiveDeadlocks + - alert: MongoDBAtlasDBIntentExclDeadlocks annotations: description: The number of database intent-exclusive-lock deadlocks occurring on node {{$labels.instance}} in cluster {{$labels.cl_name}} is {{printf "%.0f" $value}} which is above the threshold of 10. summary: There is a high number of database intent-exclusive deadlocks occurring. @@ -55,7 +55,7 @@ groups: for: 5m labels: severity: warning - - alert: MongoDBAtlasHighNumberOfDatabaseSharedDeadlocks + - alert: MongoDBAtlasDBSharedDeadlocks annotations: description: The number of database shared-lock deadlocks occurring on node {{$labels.instance}} in cluster {{$labels.cl_name}} is {{printf "%.0f" $value}} which is above the threshold of 10. summary: There is a high number of database shared deadlocks occurring. @@ -64,7 +64,7 @@ groups: for: 5m labels: severity: warning - - alert: MongoDBAtlasHighNumberOfDatabaseIntentSharedDeadlocks + - alert: MongoDBAtlasDBIntentSharedDeadlocks annotations: description: The number of database intent-shared-lock deadlocks occurring on node {{$labels.instance}} in cluster {{$labels.cl_name}} is {{printf "%.0f" $value}} which is above the threshold of 10. summary: There is a high number of database intent-shared deadlocks occurring. @@ -73,7 +73,7 @@ groups: for: 5m labels: severity: warning - - alert: MongoDBAtlasHighNumberOfSlowNetworkRequests + - alert: MongoDBAtlasSlowNetworkRequests annotations: description: The number of DNS and SSL operations taking more than 1 second to complete on node {{$labels.instance}} in cluster {{$labels.cl_name}} is {{printf "%.0f" $value}} which is above the threshold of 10. summary: There is a high number of slow network requests. @@ -100,9 +100,9 @@ groups: for: 5m labels: severity: warning - - alert: MongoDBAtlasHighNumberOfTimeoutElections + - alert: MongoDBAtlasElectionTimeouts annotations: - description: The number of elections being called due to the primary node timing out in replica set {{$labels.rs_m}} in cluster {{$labels.cl_name}} is {{printf "%.0f" $value}} which is above the threshold of 10. + description: The number of elections being called due to the primary node timing out in replica set {{$labels.rs_nm}} in cluster {{$labels.cl_name}} is {{printf "%.0f" $value}} which is above the threshold of 10. summary: There is a high number of elections being called due to the primary node timing out. expr: | sum without (cl_role,process_port,instance,rs_state) (increase(mongodb_electionMetrics_electionTimeout_called[5m])) > 10 diff --git a/mongodb-atlas-mixin/rows.libsonnet b/mongodb-atlas-mixin/rows.libsonnet new file mode 100644 index 000000000..901a4cf5b --- /dev/null +++ b/mongodb-atlas-mixin/rows.libsonnet @@ -0,0 +1,336 @@ +local g = import './g.libsonnet'; + +{ + new(this): { + local panels = this.grafana.panels, + + // + // Cluster Overview dashboard rows + // + + clusterOverviewShardRow: + g.panel.row.new('Shard') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + panels.shardNodesTable { gridPos+: { w: 24 } }, + ]), + + clusterOverviewConfigRow: + g.panel.row.new('Config') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + panels.configNodesTable { gridPos+: { w: 24 } }, + ]), + + clusterOverviewMongosRow: + g.panel.row.new('mongos') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + panels.mongosNodesTable { gridPos+: { w: 24 } }, + ]), + + clusterOverviewPerformanceRow: + g.panel.row.new('Performance') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + panels.hardwareIO { gridPos+: { w: 12 } }, + panels.hardwareIOWaitTime { gridPos+: { w: 12 } }, + panels.hardwareCPUInterruptServiceTime { gridPos+: { w: 12 } }, + panels.memoryUsed { gridPos+: { w: 12 } }, + panels.diskSpaceUsage { gridPos+: { w: 24 } }, + panels.networkRequests { gridPos+: { w: 12 } }, + panels.networkThroughput { gridPos+: { w: 12 } }, + panels.slowRequests { gridPos+: { w: 24 } }, + ]), + + clusterOverviewOperationsRow: + g.panel.row.new('Operations') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + panels.connections { gridPos+: { w: 24 } }, // TODO position this in the correct place + panels.readwriteOperations { gridPos+: { w: 12 } }, + panels.operations { gridPos+: { w: 12 } }, + panels.readwriteLatency { gridPos+: { w: 24 } }, + ]), + + clusterOverviewLocksRow: + g.panel.row.new('Locks') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + panels.currentQueue { gridPos+: { w: 12 } }, + panels.activeClientOperations { gridPos+: { w: 12 } }, + panels.databaseDeadlocks { gridPos+: { w: 12 } }, + panels.databaseWaitsAcquiringLock { gridPos+: { w: 12 } }, + ]), + + // + // Elections Overview dashboard rows + // + + electionsRow: + g.panel.row.new('Elections') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + panels.stepUpElectionsCalled { gridPos+: { w: 12 } }, + panels.priorityElections { gridPos+: { w: 12 } }, + panels.takeoverElections { gridPos+: { w: 12 } }, + panels.timeoutElections { gridPos+: { w: 12 } }, + ]), + + + electionsCatchUpsRow: + g.panel.row.new('Catch-ups') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + panels.catchUpsTotal { gridPos+: { w: 12 } }, + panels.catchUpsSkipped { gridPos+: { w: 12 } }, + panels.catchUpsSucceeded { gridPos+: { w: 12 } }, + panels.catchUpsFailed { gridPos+: { w: 12 } }, + panels.catchUpsTimedOut { gridPos+: { w: 12 } }, + panels.averageCatchUpOps { gridPos+: { w: 12 } }, + ]), + + // + // Operations overview dashboard rows + // + + operationsCountersClusterRow: + g.panel.row.new('Operation counters - cluster') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + panels.insertOperations { gridPos+: { w: 12 } }, + panels.queryOperations { gridPos+: { w: 12 } }, + panels.updateOperations { gridPos+: { w: 12 } }, + panels.deleteOperations { gridPos+: { w: 12 } }, + ]), + + operationsCountersInstanceRow: + g.panel.row.new('Operation counters - instance') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + panels.insertOperationsByInstance { gridPos+: { w: 12 } }, + panels.queryOperationsByInstance { gridPos+: { w: 12 } }, + panels.updateOperationsByInstance { gridPos+: { w: 12 } }, + panels.deleteOperationsByInstance { gridPos+: { w: 12 } }, + ]), + + operationsConnectionsRow: + g.panel.row.new('Connections') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + panels.currentConnectionsOperations { gridPos+: { w: 12 } }, + panels.activeConnectionsOperations { gridPos+: { w: 12 } }, + ]), + + operationsReadWriteRow: + g.panel.row.new('Read/Write operations') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + panels.readwriteOperationsOperations { gridPos+: { w: 12 } }, + panels.readwriteLatencyOperations { gridPos+: { w: 12 } }, + ]), + + operationsLocksRow: + g.panel.row.new('Locks') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + panels.databaseDeadlocksOperations { gridPos+: { w: 8 } }, + panels.databaseWaitCountOperations { gridPos+: { w: 8 } }, + panels.databaseWaitTimeOperations { gridPos+: { w: 8 } }, + panels.collectionDeadlocksOperations { gridPos+: { w: 8 } }, + panels.collectionWaitCountOperations { gridPos+: { w: 8 } }, + panels.collectionWaitTimeOperations { gridPos+: { w: 8 } }, + ]), + + operationsLatenciesClusterRow: + g.panel.row.new('Operation latencies - cluster') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + panels.readOperationCount { gridPos+: { w: 12 } }, + panels.writeOperationCount { gridPos+: { w: 12 } }, + panels.readOperationLatency { gridPos+: { w: 12 } }, + panels.writeOperationLatency { gridPos+: { w: 12 } }, + ]), + + operationsLatenciesInstanceRow: + g.panel.row.new('Operation latencies - instance') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + panels.readOperationCountByInstance { gridPos+: { w: 12 } }, + panels.writeOperationCountByInstance { gridPos+: { w: 12 } }, + panels.readOperationLatencyByInstance { gridPos+: { w: 12 } }, + panels.writeOperationLatencyByInstance { gridPos+: { w: 12 } }, + ]), + + operationsAvgLatenciesRow: + g.panel.row.new('Average latencies') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + panels.avgReadLatency { gridPos+: { w: 12 } }, + panels.avgWriteLatency { gridPos+: { w: 12 } }, + panels.avgReadLatencyByInstance { gridPos+: { w: 12 } }, + panels.avgWriteLatencyByInstance { gridPos+: { w: 12 } }, + ]), + + // + // Performance overview dashboard rows + // + + performanceMemoryHardwareRow: + g.panel.row.new('Memory and hardware') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + panels.memoryPerformance { gridPos+: { w: 12 } }, + panels.hardwareCPUInterruptServiceTimePerformance { gridPos+: { w: 12 } }, + ]), + + performanceDiskRow: + g.panel.row.new('Disk') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + panels.diskSpacePerformance { gridPos+: { w: 12 } }, + panels.diskSpaceUtilizationPerformance { gridPos+: { w: 12 } }, + ]), + + performanceNetworkRow: + g.panel.row.new('Network') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + panels.networkRequestsPerformance { gridPos+: { w: 12 } }, + panels.slowNetworkRequestsPerformance { gridPos+: { w: 12 } }, + panels.networkThroughputPerformance { gridPos+: { w: 24 } }, + ]), + + performanceHardwareIORow: + g.panel.row.new('Hardware I/O') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + panels.hardwareIOPerformance { gridPos+: { w: 24 } }, + panels.hardwareIOWaitTimePerformance { gridPos+: { w: 24 } }, + ]), + + performanceConnectionsRow: + g.panel.row.new('Connections') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + panels.currentConnections { gridPos+: { w: 12 } }, + panels.activeConnections { gridPos+: { w: 12 } }, + ]), + + performanceDbLocksClusterRow: + g.panel.row.new('Database lock deadlocks - cluster') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + panels.dbLockDeadlocksExclusive { gridPos+: { w: 12 } }, + panels.dbLockDeadlocksIntentExclusive { gridPos+: { w: 12 } }, + panels.dbLockDeadlocksShared { gridPos+: { w: 12 } }, + panels.dbLockDeadlocksIntentShared { gridPos+: { w: 12 } }, + ]), + + performanceDbLocksInstanceRow: + g.panel.row.new('Database lock deadlocks - instance') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + panels.dbLockDeadlocksExclusiveByInstance { gridPos+: { w: 12 } }, + panels.dbLockDeadlocksIntentExclusiveByInstance { gridPos+: { w: 12 } }, + panels.dbLockDeadlocksSharedByInstance { gridPos+: { w: 12 } }, + panels.dbLockDeadlocksIntentSharedByInstance { gridPos+: { w: 12 } }, + ]), + + performanceDbWaitCountsClusterRow: + g.panel.row.new('Database lock wait counts - cluster') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + panels.dbLockWaitCountExclusive { gridPos+: { w: 12 } }, + panels.dbLockWaitCountIntentExclusive { gridPos+: { w: 12 } }, + panels.dbLockWaitCountShared { gridPos+: { w: 12 } }, + panels.dbLockWaitCountIntentShared { gridPos+: { w: 12 } }, + ]), + + performanceDbWaitCountsInstanceRow: + g.panel.row.new('Database lock wait counts - instance') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + panels.dbLockWaitCountExclusiveByInstance { gridPos+: { w: 12 } }, + panels.dbLockWaitCountIntentExclusiveByInstance { gridPos+: { w: 12 } }, + panels.dbLockWaitCountSharedByInstance { gridPos+: { w: 12 } }, + panels.dbLockWaitCountIntentSharedByInstance { gridPos+: { w: 12 } }, + ]), + + performanceDbAcqTimeRow: + g.panel.row.new('Database lock acquisition time') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + panels.dbLockAcqTimeExclusive { gridPos+: { w: 12 } }, + panels.dbLockAcqTimeIntentExclusive { gridPos+: { w: 12 } }, + panels.dbLockAcqTimeShared { gridPos+: { w: 12 } }, + panels.dbLockAcqTimeIntentShared { gridPos+: { w: 12 } }, + ]), + + performanceCollLocksRow: + g.panel.row.new('Collection lock deadlocks') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + panels.collLockDeadlocksExclusive { gridPos+: { w: 12 } }, + panels.collLockDeadlocksIntentExclusive { gridPos+: { w: 12 } }, + panels.collLockDeadlocksShared { gridPos+: { w: 12 } }, + panels.collLockDeadlocksIntentShared { gridPos+: { w: 12 } }, + ]), + + performanceCollWaitCountsRow: + g.panel.row.new('Collection lock wait counts') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + panels.collLockWaitCountExclusive { gridPos+: { w: 12 } }, + panels.collLockWaitCountIntentExclusive { gridPos+: { w: 12 } }, + panels.collLockWaitCountShared { gridPos+: { w: 12 } }, + panels.collLockWaitCountIntentShared { gridPos+: { w: 12 } }, + ]), + + performanceCollAcqTimeRow: + g.panel.row.new('Collection lock acquisition time') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + panels.collLockAcqTimeExclusive { gridPos+: { w: 12 } }, + panels.collLockAcqTimeIntentExclusive { gridPos+: { w: 12 } }, + panels.collLockAcqTimeShared { gridPos+: { w: 12 } }, + panels.collLockAcqTimeIntentShared { gridPos+: { w: 12 } }, + ]), + + // + // Sharding overview dashboard rows + // + + shardingGeneralStatsRow: + g.panel.row.new('General sharding statistics') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + panels.staleConfigErrors { gridPos+: { w: 12 } }, + panels.chunkMigrations { gridPos+: { w: 12 } }, + panels.docsCloned { gridPos+: { w: 12 } }, + panels.criticalSectionTime { gridPos+: { w: 12 } }, + ]), + + shardingCatalogCacheRow: + g.panel.row.new('Catalog cache') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + panels.catalogCacheRefreshesStarted { gridPos+: { w: 12 } }, + panels.catalogCacheRefreshesFailed { gridPos+: { w: 12 } }, + panels.catalogCacheStaleConfigs { gridPos+: { w: 6 } }, + panels.catalogCacheEntries { gridPos+: { w: 6 } }, + panels.catalogCacheRefreshTime { gridPos+: { w: 6 } }, + panels.catalogCacheOperationsBlocked { gridPos+: { w: 6 } }, + ]), + + shardingOperationsRow: + g.panel.row.new('Shard operations') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + panels.shardTargetingAllShards { gridPos+: { w: 12 } }, + panels.shardTargetingManyShards { gridPos+: { w: 12 } }, + panels.shardTargetingOneShard { gridPos+: { w: 12 } }, + panels.shardTargetingUnsharded { gridPos+: { w: 12 } }, + ]), + }, +} diff --git a/mongodb-atlas-mixin/signals/cluster.libsonnet b/mongodb-atlas-mixin/signals/cluster.libsonnet new file mode 100644 index 000000000..e8fe04716 --- /dev/null +++ b/mongodb-atlas-mixin/signals/cluster.libsonnet @@ -0,0 +1,503 @@ +function(this) + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + enableLokiLogs: this.enableLokiLogs, + aggLevel: 'none', + aggFunction: 'sum', + signals: { + + // Inventory signals (for shard, config, mongos nodes these are just representative metrics to best populate the tables) + shardNodeRepresentativeMetric: { + name: 'Shard node representative metric', + type: 'gauge', + aggLevel: 'none', + description: 'A representative metric for the shard node inventory.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_network_bytesIn{%(queriesSelector)s, rs_nm=~"$rs_nm"}', // representative metric for a table + legendCustomTemplate: '', + }, + }, + }, + + configNodeRepresentativeMetric: { + name: 'Config inventory representative metric', + type: 'gauge', + aggLevel: 'none', + description: 'A representative metric for the config node inventory.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_network_bytesIn{%(queriesSelector)s, rs_nm=~"$rs_nm"}', // representative metric for a table + legendCustomTemplate: '', + }, + }, + }, + + mongosNodeRepresentativeMetric: { + name: 'Mongos node representative metric', + type: 'gauge', + aggLevel: 'none', + description: 'A representative metric for the mongos node inventory.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_network_bytesIn{%(queriesSelector)s, rs_nm=~"$rs_nm"}', // representative metric for a table + legendCustomTemplate: '', + }, + }, + }, + + // Hardware signals + diskReadCount: { + name: 'Disk read operations', + type: 'raw', + description: 'Number of disk read operations.', + unit: 'ops', + sources: { + prometheus: { + expr: 'sum(rate(hardware_disk_metrics_read_count{%(queriesSelector)s,cl_name=~"$cl_name"}[$__rate_interval])) by (cl_name)', + legendCustomTemplate: '{{cl_name}} - reads', + }, + }, + }, + + diskWriteCount: { + name: 'Disk write operations', + type: 'raw', + description: 'Number of disk write operations.', + unit: 'ops', + sources: { + prometheus: { + expr: 'sum(rate(hardware_disk_metrics_write_count{%(queriesSelector)s,cl_name=~"$cl_name"}[$__rate_interval])) by (cl_name)', + legendCustomTemplate: '{{cl_name}} - writes', + }, + }, + }, + + diskReadTime: { + name: 'Disk read I/O time', + type: 'raw', + description: 'Time spent on read I/O operations.', + unit: 'ms', + sources: { + prometheus: { + expr: 'sum(increase(hardware_disk_metrics_read_time_milliseconds{%(queriesSelector)s,cl_name=~"$cl_name"}[$__interval:] offset -$__interval)) by (cl_name)', + legendCustomTemplate: '{{cl_name}} - reads', + }, + }, + }, + + diskWriteTime: { + name: 'Disk write I/O time', + type: 'raw', + description: 'Time spent on write I/O operations.', + unit: 'ms', + sources: { + prometheus: { + expr: 'sum(increase(hardware_disk_metrics_write_time_milliseconds{%(queriesSelector)s,cl_name=~"$cl_name"}[$__interval:] offset -$__interval)) by (cl_name)', + legendCustomTemplate: '{{cl_name}} - writes', + }, + }, + }, + + cpuIrqTime: { + name: 'CPU interrupt service time', + type: 'raw', + description: 'CPU time spent servicing interrupts.', + unit: 'ms', + sources: { + prometheus: { + expr: 'sum(increase(hardware_system_cpu_irq_milliseconds{%(queriesSelector)s,cl_name=~"$cl_name"}[$__interval:] offset -$__interval)) by (cl_name)', + legendCustomTemplate: '{{cl_name}}', + }, + }, + }, + + diskSpaceUtilization: { + name: 'Disk space utilization', + type: 'raw', + description: 'Percentage of disk space used.', + unit: 'percentunit', + sources: { + prometheus: { + expr: '(sum without (disk_name) (hardware_disk_metrics_disk_space_used_bytes{%(queriesSelector)s})) / clamp_min((sum without (disk_name) (hardware_disk_metrics_disk_space_used_bytes{%(queriesSelector)s})) + (sum without (disk_name) (hardware_disk_metrics_disk_space_free_bytes{%(queriesSelector)s})), 1)', + legendCustomTemplate: '{{cl_name}}', + }, + }, + }, + + // Memory signals + memoryResident: { + name: 'Memory resident (RAM)', + type: 'raw', + description: 'Resident memory (RAM) usage.', + unit: 'mbytes', + sources: { + prometheus: { + expr: 'sum(mongodb_mem_resident{%(queriesSelector)s, rs_nm=~"$rs_nm"}) by (cl_name)', + legendCustomTemplate: '{{cl_name}} - RAM', + }, + }, + }, + + memoryVirtual: { + name: 'Memory virtual', + type: 'raw', + description: 'Virtual memory usage.', + unit: 'mbytes', + sources: { + prometheus: { + expr: 'sum(mongodb_mem_virtual{%(queriesSelector)s, rs_nm=~"$rs_nm"}) by (cl_name)', + legendCustomTemplate: '{{cl_name}} - virtual', + }, + }, + }, + + // Network signals + networkRequests: { + name: 'Network requests', + type: 'raw', + description: 'Number of network requests received.', + unit: 'reqps', + sources: { + prometheus: { + expr: 'sum(rate(mongodb_network_numRequests{%(queriesSelector)s, rs_nm=~"$rs_nm"}[$__rate_interval])) by (cl_name)', + legendCustomTemplate: '{{cl_name}}', + }, + }, + }, + + networkBytesIn: { + name: 'Network bytes received', + type: 'raw', + description: 'Network bytes received.', + unit: 'Bps', + sources: { + prometheus: { + expr: 'sum(rate(mongodb_network_bytesIn{%(queriesSelector)s, rs_nm=~"$rs_nm"}[$__rate_interval])) by (cl_name)', + legendCustomTemplate: '{{cl_name}} - received', + }, + }, + }, + + networkBytesOut: { + name: 'Network bytes sent', + type: 'raw', + description: 'Network bytes sent.', + unit: 'Bps', + sources: { + prometheus: { + expr: 'sum(rate(mongodb_network_bytesOut{%(queriesSelector)s, rs_nm=~"$rs_nm"}[$__rate_interval])) by (cl_name)', + legendCustomTemplate: '{{cl_name}} - sent', + }, + }, + }, + + networkSlowDNS: { + name: 'Slow DNS operations', + type: 'raw', + description: 'Number of slow DNS operations (>1s).', + unit: 'ops', + sources: { + prometheus: { + expr: 'sum(rate(mongodb_network_numSlowDNSOperations{%(queriesSelector)s, rs_nm=~"$rs_nm"}[$__rate_interval])) by (cl_name)', + legendCustomTemplate: '{{cl_name}} - DNS', + }, + }, + }, + + networkSlowSSL: { + name: 'Slow SSL operations', + type: 'counter', + description: 'Number of slow SSL operations (>1s).', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_network_numSlowSSLOperations{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{cl_name}} - SSL', + }, + }, + }, + + // Connection signals + connectionsCreated: { + name: 'Connections created', + type: 'raw', + description: 'Total connections created.', + unit: 'conns', + sources: { + prometheus: { + expr: 'sum(rate(mongodb_connections_totalCreated{%(queriesSelector)s}[$__rate_interval])) by (cl_name)', + legendCustomTemplate: '{{cl_name}}', + }, + }, + }, + + // Operations signals + opLatenciesReadsOps: { + name: 'Read operation count', + type: 'raw', + description: 'Number of read operations.', + unit: 'ops', + sources: { + prometheus: { + expr: 'sum(rate(mongodb_opLatencies_reads_ops{%(queriesSelector)s, rs_nm=~"$rs_nm"}[$__rate_interval])) by (cl_name)', + legendCustomTemplate: '{{cl_name}} - reads', + }, + }, + }, + + opLatenciesWritesOps: { + name: 'Write operation count', + type: 'raw', + description: 'Number of write operations.', + unit: 'ops', + sources: { + prometheus: { + expr: 'sum(rate(mongodb_opLatencies_writes_ops{%(queriesSelector)s, rs_nm=~"$rs_nm"}[$__rate_interval])) by (cl_name)', + legendCustomTemplate: '{{cl_name}} - writes', + }, + }, + }, + + opCountersInsert: { + name: 'Insert operations', + type: 'raw', + description: 'Number of insert operations.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum(increase(mongodb_opcounters_insert{%(queriesSelector)s, rs_nm=~"$rs_nm"}[$__interval:] offset -$__interval)) by (cl_name)', + legendCustomTemplate: '{{cl_name}} - insert', + }, + }, + }, + + opCountersQuery: { + name: 'Query operations', + type: 'raw', + description: 'Number of query operations.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum(increase(mongodb_opcounters_query{%(queriesSelector)s, rs_nm=~"$rs_nm"}[$__interval:] offset -$__interval)) by (cl_name)', + legendCustomTemplate: '{{cl_name}} - query', + }, + }, + }, + + opCountersUpdate: { + name: 'Update operations', + type: 'raw', + description: 'Number of update operations.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum(increase(mongodb_opcounters_update{%(queriesSelector)s, rs_nm=~"$rs_nm"}[$__interval:] offset -$__interval)) by (cl_name)', + legendCustomTemplate: '{{cl_name}} - update', + }, + }, + }, + + opCountersDelete: { + name: 'Delete operations', + type: 'raw', + description: 'Number of delete operations.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum(increase(mongodb_opcounters_delete{%(queriesSelector)s, rs_nm=~"$rs_nm"}[$__interval:] offset -$__interval)) by (cl_name)', + legendCustomTemplate: '{{cl_name}} - delete', + }, + }, + }, + + opLatenciesReadsLatency: { + name: 'Read operation latency', + type: 'raw', + description: 'Total read operation latency.', + unit: 'µs', + sources: { + prometheus: { + expr: 'sum(increase(mongodb_opLatencies_reads_latency{%(queriesSelector)s, rs_nm=~"$rs_nm"}[$__interval:] offset -$__interval)) by (cl_name)', + legendCustomTemplate: '{{cl_name}} - reads', + }, + }, + }, + + opLatenciesWritesLatency: { + name: 'Write operation latency', + type: 'counter', + description: 'Total write operation latency.', + unit: 'µs', + sources: { + prometheus: { + expr: 'mongodb_opLatencies_writes_latency{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{cl_name}} - writes', + }, + }, + }, + + // Locks signals + globalLockQueueReaders: { + name: 'Global lock queue - readers', + type: 'raw', + description: 'Number of read operations queued due to locks.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum(mongodb_globalLock_currentQueue_readers{%(queriesSelector)s, rs_nm=~"$rs_nm"}) by (cl_name)', + legendCustomTemplate: '{{cl_name}} - reads', + }, + }, + }, + + globalLockQueueWriters: { + name: 'Global lock queue - writers', + type: 'raw', + description: 'Number of write operations queued due to locks.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum(mongodb_globalLock_currentQueue_writers{%(queriesSelector)s, rs_nm=~"$rs_nm"}) by (cl_name)', + legendCustomTemplate: '{{cl_name}} - writes', + }, + }, + }, + + globalLockActiveReaders: { + name: 'Global lock active clients - readers', + type: 'raw', + description: 'Number of active read operations.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum(mongodb_globalLock_activeClients_readers{%(queriesSelector)s}) by (cl_name)', + legendCustomTemplate: '{{cl_name}} - reads', + }, + }, + }, + + globalLockActiveWriters: { + name: 'Global lock active clients - writers', + type: 'raw', + description: 'Number of active write operations.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum(mongodb_globalLock_activeClients_writers{%(queriesSelector)s}) by (cl_name)', + legendCustomTemplate: '{{cl_name}} - writes', + }, + }, + }, + + dbDeadlockExclusive: { + name: 'Database exclusive lock deadlocks', + type: 'raw', + description: 'Database exclusive lock deadlocks.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum(increase(mongodb_locks_Database_deadlockCount_W{%(queriesSelector)s}[$__interval:] offset -$__interval)) by (cl_name)', + legendCustomTemplate: '{{cl_name}} - exclusive', + }, + }, + }, + + dbDeadlockIntentExclusive: { + name: 'Database intent exclusive lock deadlocks', + type: 'raw', + description: 'Database intent exclusive lock deadlocks.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum(increase(mongodb_locks_Database_deadlockCount_w{%(queriesSelector)s}[$__interval:] offset -$__interval)) by (cl_name)', + legendCustomTemplate: '{{cl_name}} - intent exclusive', + }, + }, + }, + + dbDeadlockShared: { + name: 'Database shared lock deadlocks', + type: 'raw', + description: 'Database shared lock deadlocks.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum(increase(mongodb_locks_Database_deadlockCount_R{%(queriesSelector)s}[$__interval:] offset -$__interval)) by (cl_name)', + legendCustomTemplate: '{{cl_name}} - shared', + }, + }, + }, + + dbDeadlockIntentShared: { + name: 'Database intent shared lock deadlocks', + type: 'raw', + description: 'Database intent shared lock deadlocks.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum(increase(mongodb_locks_Database_deadlockCount_r{%(queriesSelector)s}[$__interval:] offset -$__interval)) by (cl_name)', + legendCustomTemplate: '{{cl_name}} - intent shared', + }, + }, + }, + + dbWaitCountExclusive: { + name: 'Database exclusive lock wait count', + type: 'raw', + description: 'Database exclusive lock wait count.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum(increase(mongodb_locks_Database_acquireWaitCount_W{%(queriesSelector)s}[$__interval:] offset -$__interval)) by (cl_name)', + legendCustomTemplate: '{{cl_name}} - exclusive', + }, + }, + }, + + dbWaitCountIntentExclusive: { + name: 'Database intent exclusive lock wait count', + type: 'raw', + description: 'Database intent exclusive lock wait count.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum(increase(mongodb_locks_Database_acquireWaitCount_w{%(queriesSelector)s}[$__interval:] offset -$__interval)) by (cl_name)', + legendCustomTemplate: '{{cl_name}} - intent exclusive', + }, + }, + }, + + dbWaitCountShared: { + name: 'Database shared lock wait count', + type: 'raw', + description: 'Database shared lock wait count.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum(increase(mongodb_locks_Database_acquireWaitCount_R{%(queriesSelector)s}[$__interval:] offset -$__interval)) by (cl_name)', + legendCustomTemplate: '{{cl_name}} - shared', + }, + }, + }, + + dbWaitCountIntentShared: { + name: 'Database intent shared lock wait count', + type: 'raw', + description: 'Database intent shared lock wait count.', + unit: 'none', + sources: { + prometheus: { + expr: 'sum(increase(mongodb_locks_Database_acquireWaitCount_r{%(queriesSelector)s}[$__interval:] offset -$__interval)) by (cl_name)', + legendCustomTemplate: '{{cl_name}} - intent shared', + }, + }, + }, + }, + } diff --git a/mongodb-atlas-mixin/signals/elections.libsonnet b/mongodb-atlas-mixin/signals/elections.libsonnet new file mode 100644 index 000000000..6d7a72367 --- /dev/null +++ b/mongodb-atlas-mixin/signals/elections.libsonnet @@ -0,0 +1,207 @@ +function(this) + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + enableLokiLogs: this.enableLokiLogs, + aggLevel: 'none', + aggFunction: 'sum', + signals: { + + stepUpCmdCalled: { + name: 'Step-up elections called', + type: 'counter', + description: 'Number of step-up elections called.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_electionMetrics_stepUpCmd_called{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - called', + }, + }, + }, + + stepUpCmdSuccessful: { + name: 'Step-up elections successful', + type: 'counter', + description: 'Number of successful step-up elections.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_electionMetrics_stepUpCmd_successful{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - successful', + }, + }, + }, + + priorityTakeoverCalled: { + name: 'Priority takeover elections called', + type: 'counter', + description: 'Number of priority takeover elections called.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_electionMetrics_priorityTakeover_called{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - called', + }, + }, + }, + + priorityTakeoverSuccessful: { + name: 'Priority takeover elections successful', + type: 'counter', + description: 'Number of successful priority takeover elections.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_electionMetrics_priorityTakeover_successful{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - successful', + }, + }, + }, + + catchUpTakeoverCalled: { + name: 'Catch-up takeover elections called', + type: 'counter', + description: 'Number of catch-up takeover elections called.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_electionMetrics_catchUpTakeover_called{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - called', + }, + }, + }, + + catchUpTakeoverSuccessful: { + name: 'Catch-up takeover elections successful', + type: 'counter', + description: 'Number of successful catch-up takeover elections.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_electionMetrics_catchUpTakeover_successful{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - successful', + }, + }, + }, + + electionTimeoutCalled: { + name: 'Election timeout elections called', + type: 'counter', + description: 'Number of election timeout elections called.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_electionMetrics_electionTimeout_called{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - called', + }, + }, + }, + + electionTimeoutSuccessful: { + name: 'Election timeout elections successful', + type: 'counter', + description: 'Number of successful election timeout elections.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_electionMetrics_electionTimeout_successful{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - successful', + }, + }, + }, + + numCatchUps: { + name: 'Number of catch-ups', + type: 'counter', + description: 'Number of catch-up operations.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_electionMetrics_numCatchUps{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + numCatchUpsSkipped: { + name: 'Number of catch-ups skipped', + type: 'counter', + description: 'Number of catch-ups skipped.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_electionMetrics_numCatchUpsSkipped{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + numCatchUpsSucceeded: { + name: 'Number of catch-ups succeeded', + type: 'counter', + description: 'Number of catch-ups succeeded.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_electionMetrics_numCatchUpsSucceeded{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + numCatchUpsFailedWithError: { + name: 'Number of catch-ups failed with error', + type: 'counter', + description: 'Number of catch-ups failed with error.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_electionMetrics_numCatchUpsFailedWithError{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + numCatchUpsTimedOut: { + name: 'Number of catch-up timeouts', + type: 'counter', + description: 'Number of catch-up timeouts.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_electionMetrics_numCatchUpsTimedOut{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + averageCatchUpOps: { + name: 'Average catch-up operations', + type: 'gauge', + aggLevel: 'none', + description: 'Average catch-up operations.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_electionMetrics_averageCatchUpOps{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + }, + } diff --git a/mongodb-atlas-mixin/signals/operations.libsonnet b/mongodb-atlas-mixin/signals/operations.libsonnet new file mode 100644 index 000000000..ac0c2f3f8 --- /dev/null +++ b/mongodb-atlas-mixin/signals/operations.libsonnet @@ -0,0 +1,733 @@ +function(this) + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + enableLokiLogs: this.enableLokiLogs, + aggLevel: 'group', + aggFunction: 'sum', + signals: { + + // Operation counters (cluster-level) + opCountersInsert: { + name: 'Insert operations', + type: 'counter', + description: 'Number of insert operations.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_opcounters_insert{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{cl_name}} - insert', + }, + }, + }, + + opCountersQuery: { + name: 'Query operations', + type: 'counter', + description: 'Number of query operations.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_opcounters_query{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{cl_name}} - query', + }, + }, + }, + + opCountersDelete: { + name: 'Delete operations', + type: 'counter', + description: 'Number of delete operations.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_opcounters_delete{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{cl_name}} - delete', + }, + }, + }, + + // Operation counters (by instance) + opCountersInsertByInstance: { + name: 'Insert operations by instance', + type: 'counter', + aggLevel: 'none', + description: 'Number of insert operations per instance.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_opcounters_insert{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + opCountersQueryByInstance: { + name: 'Query operations by instance', + type: 'counter', + aggLevel: 'none', + description: 'Number of query operations per instance.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_opcounters_query{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + opCountersUpdate: { + name: 'Update operations', + type: 'counter', + aggLevel: 'none', + description: 'Number of update operations.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_opcounters_update{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + opCountersDeleteByInstance: { + name: 'Delete operations by instance', + type: 'counter', + aggLevel: 'none', + description: 'Number of delete operations per instance.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_opcounters_delete{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + // Operation latencies (cluster-level) + opLatenciesReadsOps: { + name: 'Read operation count', + type: 'counter', + description: 'Number of read operations.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_opLatencies_reads_ops{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{cl_name}} - reads', + }, + }, + }, + + opLatenciesWritesOps: { + name: 'Write operation count', + type: 'counter', + description: 'Number of write operations.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_opLatencies_writes_ops{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{cl_name}} - writes', + }, + }, + }, + + opLatenciesReadsLatency: { + name: 'Read operation latency', + type: 'counter', + description: 'Total read operation latency.', + unit: 'µs', + sources: { + prometheus: { + expr: 'mongodb_opLatencies_reads_latency{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{cl_name}} - reads', + }, + }, + }, + + opLatenciesWritesLatency: { + name: 'Write operation latency', + type: 'counter', + description: 'Total write operation latency.', + unit: 'µs', + sources: { + prometheus: { + expr: 'mongodb_opLatencies_writes_latency{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{cl_name}} - writes', + }, + }, + }, + + // Operation latencies (by instance) + opLatenciesReadsOpsByInstance: { + name: 'Read operation count by instance', + type: 'counter', + aggLevel: 'none', + description: 'Number of read operations per instance.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_opLatencies_reads_ops{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}} - reads', + }, + }, + }, + + opLatenciesWritesOpsByInstance: { + name: 'Write operation count by instance', + type: 'counter', + aggLevel: 'none', + description: 'Number of write operations per instance.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_opLatencies_writes_ops{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}} - writes', + }, + }, + }, + + opLatenciesReadsLatencyByInstance: { + name: 'Read operation latency by instance', + type: 'counter', + aggLevel: 'none', + description: 'Total read operation latency per instance.', + unit: 'µs', + sources: { + prometheus: { + expr: 'mongodb_opLatencies_reads_latency{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - reads', + }, + }, + }, + + opLatenciesWritesLatencyByInstance: { + name: 'Write operation latency by instance', + type: 'counter', + aggLevel: 'none', + description: 'Total write operation latency per instance.', + unit: 'µs', + sources: { + prometheus: { + expr: 'mongodb_opLatencies_writes_latency{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - writes', + }, + }, + }, + + // Average latency calculations + avgReadLatency: { + name: 'Average read latency', + type: 'raw', + description: 'Average latency per read operation.', + unit: 'µs', + sources: { + prometheus: { + expr: 'sum (increase(mongodb_opLatencies_reads_latency{%(queriesSelector)s, rs_nm=~"$rs_nm"}[$__interval:])) by (job, cl_name) / clamp_min(sum (increase(mongodb_opLatencies_reads_ops{%(queriesSelector)s, rs_nm=~"$rs_nm"}[$__interval:])) by (job, cl_name), 1)', + legendCustomTemplate: '{{cl_name}} - reads', + }, + }, + }, + + avgWriteLatency: { + name: 'Average write latency', + type: 'raw', + description: 'Average latency per write operation.', + unit: 'µs', + sources: { + prometheus: { + expr: 'sum (increase(mongodb_opLatencies_writes_latency{%(queriesSelector)s, rs_nm=~"$rs_nm"}[$__interval:])) by (job, cl_name) / clamp_min(sum (increase(mongodb_opLatencies_writes_ops{%(queriesSelector)s, rs_nm=~"$rs_nm"}[$__interval:])) by (job, cl_name), 1)', + legendCustomTemplate: '{{cl_name}} - writes', + }, + }, + }, + + avgReadLatencyByInstance: { + name: 'Average read latency by instance', + type: 'raw', + description: 'Average latency per read operation by instance.', + unit: 'µs', + sources: { + prometheus: { + expr: 'increase(mongodb_opLatencies_reads_latency{%(queriesSelector)s, rs_nm=~"$rs_nm"}[$__interval:]) / clamp_min(increase(mongodb_opLatencies_reads_ops{%(queriesSelector)s, rs_nm=~"$rs_nm"}[$__interval:]), 1)', + legendCustomTemplate: '{{instance}} - reads', + }, + }, + }, + + avgWriteLatencyByInstance: { + name: 'Average write latency by instance', + type: 'raw', + description: 'Average latency per write operation by instance.', + unit: 'µs', + sources: { + prometheus: { + expr: 'increase(mongodb_opLatencies_writes_latency{%(queriesSelector)s, rs_nm=~"$rs_nm"}[$__interval:]) / clamp_min(increase(mongodb_opLatencies_writes_ops{%(queriesSelector)s, rs_nm=~"$rs_nm"}[$__interval:]), 1)', + legendCustomTemplate: '{{instance}} - writes', + }, + }, + }, + + // Connection signals + connectionsCurrent: { + name: 'Current connections', + type: 'gauge', + aggLevel: 'none', + description: 'Current number of incoming connections.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_connections_current{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + connectionsActive: { + name: 'Active connections', + type: 'gauge', + aggLevel: 'none', + description: 'Current number of connections with operations in progress.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_connections_active{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + // Read/write operations (by instance with stacking) + opLatenciesReadsOpsByInstanceStacked: { + name: 'Read operations by instance', + type: 'counter', + aggLevel: 'none', + description: 'Rate of read operations per instance.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_opLatencies_reads_ops{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}} - reads', + }, + }, + }, + + opLatenciesWritesOpsByInstanceStacked: { + name: 'Write operations by instance', + type: 'counter', + aggLevel: 'none', + description: 'Rate of write operations per instance.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_opLatencies_writes_ops{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}} - writes', + }, + }, + }, + + // Read/write latencies (by instance with stacking) + opLatenciesReadsLatencyByInstanceStacked: { + name: 'Read latency by instance', + type: 'counter', + aggLevel: 'none', + description: 'Read operation latency per instance.', + unit: 'µs', + sources: { + prometheus: { + expr: 'mongodb_opLatencies_reads_latency{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - reads', + }, + }, + }, + + opLatenciesWritesLatencyByInstanceStacked: { + name: 'Write latency by instance', + type: 'counter', + aggLevel: 'none', + description: 'Write operation latency per instance.', + unit: 'µs', + sources: { + prometheus: { + expr: 'mongodb_opLatencies_writes_latency{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - writes', + }, + }, + }, + + // Database lock deadlocks (by instance) + dbDeadlockExclusiveByInstance: { + name: 'Database exclusive lock deadlocks by instance', + type: 'counter', + aggLevel: 'none', + description: 'Database exclusive lock deadlocks per instance.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_deadlockCount_W{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - exclusive', + }, + }, + }, + + dbDeadlockIntentExclusiveByInstance: { + name: 'Database intent exclusive lock deadlocks by instance', + type: 'counter', + aggLevel: 'none', + description: 'Database intent exclusive lock deadlocks per instance.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_deadlockCount_w{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - intent exclusive', + }, + }, + }, + + dbDeadlockSharedByInstance: { + name: 'Database shared lock deadlocks by instance', + type: 'counter', + aggLevel: 'none', + description: 'Database shared lock deadlocks per instance.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_deadlockCount_R{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - shared', + }, + }, + }, + + dbDeadlockIntentSharedByInstance: { + name: 'Database intent shared lock deadlocks by instance', + type: 'counter', + aggLevel: 'none', + description: 'Database intent shared lock deadlocks per instance.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_deadlockCount_r{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - intent shared', + }, + }, + }, + + // Database lock wait counts (by instance) + dbWaitCountExclusiveByInstance: { + name: 'Database exclusive lock wait count by instance', + type: 'counter', + aggLevel: 'none', + description: 'Database exclusive lock wait count per instance.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_acquireWaitCount_W{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - exclusive', + }, + }, + }, + + dbWaitCountIntentExclusiveByInstance: { + name: 'Database intent exclusive lock wait count by instance', + type: 'counter', + aggLevel: 'none', + description: 'Database intent exclusive lock wait count per instance.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_acquireWaitCount_w{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - intent exclusive', + }, + }, + }, + + dbWaitCountSharedByInstance: { + name: 'Database shared lock wait count by instance', + type: 'counter', + aggLevel: 'none', + description: 'Database shared lock wait count per instance.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_acquireWaitCount_R{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - shared', + }, + }, + }, + + dbWaitCountIntentSharedByInstance: { + name: 'Database intent shared lock wait count by instance', + type: 'counter', + aggLevel: 'none', + description: 'Database intent shared lock wait count per instance.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_acquireWaitCount_r{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - intent shared', + }, + }, + }, + + // Database lock acquisition time (by instance) + dbAcqTimeExclusiveByInstance: { + name: 'Database exclusive lock acquisition time by instance', + type: 'counter', + aggLevel: 'none', + description: 'Database exclusive lock acquisition time per instance.', + unit: 'µs', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_timeAcquiringMicros_W{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - exclusive', + }, + }, + }, + + dbAcqTimeIntentExclusiveByInstance: { + name: 'Database intent exclusive lock acquisition time by instance', + type: 'counter', + aggLevel: 'none', + description: 'Database intent exclusive lock acquisition time per instance.', + unit: 'µs', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_timeAcquiringMicros_w{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - intent exclusive', + }, + }, + }, + + dbAcqTimeSharedByInstance: { + name: 'Database shared lock acquisition time by instance', + type: 'counter', + aggLevel: 'none', + description: 'Database shared lock acquisition time per instance.', + unit: 'µs', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_timeAcquiringMicros_R{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - shared', + }, + }, + }, + + dbAcqTimeIntentSharedByInstance: { + name: 'Database intent shared lock acquisition time by instance', + type: 'counter', + aggLevel: 'none', + description: 'Database intent shared lock acquisition time per instance.', + unit: 'µs', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_timeAcquiringMicros_r{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - intent shared', + }, + }, + }, + + // Collection lock deadlocks (by instance) + collDeadlockExclusiveByInstance: { + name: 'Collection exclusive lock deadlocks by instance', + type: 'counter', + aggLevel: 'none', + description: 'Collection exclusive lock deadlocks per instance.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Collection_deadlockCount_W{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - exclusive', + }, + }, + }, + + collDeadlockIntentExclusiveByInstance: { + name: 'Collection intent exclusive lock deadlocks by instance', + type: 'counter', + aggLevel: 'none', + description: 'Collection intent exclusive lock deadlocks per instance.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Collection_deadlockCount_w{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - intent exclusive', + }, + }, + }, + + collDeadlockSharedByInstance: { + name: 'Collection shared lock deadlocks by instance', + type: 'counter', + aggLevel: 'none', + description: 'Collection shared lock deadlocks per instance.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Collection_deadlockCount_R{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - shared', + }, + }, + }, + + collDeadlockIntentSharedByInstance: { + name: 'Collection intent shared lock deadlocks by instance', + type: 'counter', + aggLevel: 'none', + description: 'Collection intent shared lock deadlocks per instance.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Collection_deadlockCount_r{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - intent shared', + }, + }, + }, + + // Collection lock wait counts (by instance) + collWaitCountExclusiveByInstance: { + name: 'Collection exclusive lock wait count by instance', + type: 'counter', + aggLevel: 'none', + description: 'Collection exclusive lock wait count per instance.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Collection_acquireWaitCount_W{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - exclusive', + }, + }, + }, + + collWaitCountIntentExclusiveByInstance: { + name: 'Collection intent exclusive lock wait count by instance', + type: 'counter', + aggLevel: 'none', + description: 'Collection intent exclusive lock wait count per instance.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Collection_acquireWaitCount_w{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - intent exclusive', + }, + }, + }, + + collWaitCountSharedByInstance: { + name: 'Collection shared lock wait count by instance', + type: 'counter', + aggLevel: 'none', + description: 'Collection shared lock wait count per instance.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Collection_acquireWaitCount_R{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - shared', + }, + }, + }, + + collWaitCountIntentSharedByInstance: { + name: 'Collection intent shared lock wait count by instance', + type: 'counter', + aggLevel: 'none', + description: 'Collection intent shared lock wait count per instance.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Collection_acquireWaitCount_r{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - intent shared', + }, + }, + }, + + // Collection lock acquisition time (by instance) + collAcqTimeExclusiveByInstance: { + name: 'Collection exclusive lock acquisition time by instance', + type: 'counter', + aggLevel: 'none', + description: 'Collection exclusive lock acquisition time per instance.', + unit: 'µs', + sources: { + prometheus: { + expr: 'mongodb_locks_Collection_timeAcquiringMicros_W{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - exclusive', + }, + }, + }, + + collAcqTimeIntentExclusiveByInstance: { + name: 'Collection intent exclusive lock acquisition time by instance', + type: 'counter', + aggLevel: 'none', + description: 'Collection intent exclusive lock acquisition time per instance.', + unit: 'µs', + sources: { + prometheus: { + expr: 'mongodb_locks_Collection_timeAcquiringMicros_w{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - intent exclusive', + }, + }, + }, + + collAcqTimeSharedByInstance: { + name: 'Collection shared lock acquisition time by instance', + type: 'counter', + aggLevel: 'none', + description: 'Collection shared lock acquisition time per instance.', + unit: 'µs', + sources: { + prometheus: { + expr: 'mongodb_locks_Collection_timeAcquiringMicros_R{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - shared', + }, + }, + }, + + collAcqTimeIntentSharedByInstance: { + name: 'Collection intent shared lock acquisition time by instance', + type: 'counter', + aggLevel: 'none', + description: 'Collection intent shared lock acquisition time per instance.', + unit: 'µs', + sources: { + prometheus: { + expr: 'mongodb_locks_Collection_timeAcquiringMicros_r{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - intent shared', + }, + }, + }, + }, + } diff --git a/mongodb-atlas-mixin/signals/performance.libsonnet b/mongodb-atlas-mixin/signals/performance.libsonnet new file mode 100644 index 000000000..7efb43c23 --- /dev/null +++ b/mongodb-atlas-mixin/signals/performance.libsonnet @@ -0,0 +1,738 @@ +function(this) + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + enableLokiLogs: this.enableLokiLogs, + aggLevel: 'group', + aggFunction: 'sum', + signals: { + + // Connection signals + connectionsCurrent: { + name: 'Current connections', + type: 'gauge', + aggLevel: 'none', + description: 'Current number of active connections.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_connections_current{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + connectionsActive: { + name: 'Active connections', + type: 'gauge', + aggLevel: 'none', + description: 'Current number of connections with operations in progress.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_connections_active{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + // Database lock deadlocks (cluster-level) + dbDeadlockExclusive: { + name: 'Database exclusive lock deadlocks', + type: 'counter', + description: 'Database exclusive lock deadlocks.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_deadlockCount_W{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{cl_name}} - exclusive', + }, + }, + }, + + dbDeadlockIntentExclusive: { + name: 'Database intent exclusive lock deadlocks', + type: 'counter', + description: 'Database intent exclusive lock deadlocks.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_deadlockCount_w{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{cl_name}} - intent exclusive', + }, + }, + }, + + dbDeadlockShared: { + name: 'Database shared lock deadlocks', + type: 'counter', + description: 'Database shared lock deadlocks.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_deadlockCount_R{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{cl_name}} - shared', + }, + }, + }, + + dbDeadlockIntentShared: { + name: 'Database intent shared lock deadlocks', + type: 'counter', + description: 'Database intent shared lock deadlocks.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_deadlockCount_r{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{cl_name}} - intent shared', + }, + }, + }, + + // Database lock deadlocks (by instance) + dbDeadlockExclusiveByInstance: { + name: 'Database exclusive lock deadlocks by instance', + type: 'counter', + aggLevel: 'none', + description: 'Database exclusive lock deadlocks per instance.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_deadlockCount_W{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - exclusive', + }, + }, + }, + + dbDeadlockIntentExclusiveByInstance: { + name: 'Database intent exclusive lock deadlocks by instance', + type: 'counter', + aggLevel: 'none', + description: 'Database intent exclusive lock deadlocks per instance.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_deadlockCount_w{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - intent exclusive', + }, + }, + }, + + dbDeadlockSharedByInstance: { + name: 'Database shared lock deadlocks by instance', + type: 'counter', + aggLevel: 'none', + description: 'Database shared lock deadlocks per instance.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_deadlockCount_R{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - shared', + }, + }, + }, + + dbDeadlockIntentSharedByInstance: { + name: 'Database intent shared lock deadlocks by instance', + type: 'counter', + aggLevel: 'none', + description: 'Database intent shared lock deadlocks per instance.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_deadlockCount_r{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - intent shared', + }, + }, + }, + + // Database lock wait counts (cluster-level) + dbWaitCountExclusive: { + name: 'Database exclusive lock wait count', + type: 'counter', + description: 'Database exclusive lock wait count.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_acquireWaitCount_W{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{cl_name}} - exclusive', + }, + }, + }, + + dbWaitCountIntentExclusive: { + name: 'Database intent exclusive lock wait count', + type: 'counter', + description: 'Database intent exclusive lock wait count.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_acquireWaitCount_w{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{cl_name}} - intent exclusive', + }, + }, + }, + + dbWaitCountShared: { + name: 'Database shared lock wait count', + type: 'counter', + description: 'Database shared lock wait count.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_acquireWaitCount_R{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{cl_name}} - shared', + }, + }, + }, + + dbWaitCountIntentShared: { + name: 'Database intent shared lock wait count', + type: 'counter', + description: 'Database intent shared lock wait count.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_acquireWaitCount_r{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{cl_name}} - intent shared', + }, + }, + }, + + // Database lock wait counts (by instance) + dbWaitCountExclusiveByInstance: { + name: 'Database exclusive lock wait count by instance', + type: 'counter', + aggLevel: 'none', + description: 'Database exclusive lock wait count per instance.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_acquireWaitCount_W{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - exclusive', + }, + }, + }, + + dbWaitCountIntentExclusiveByInstance: { + name: 'Database intent exclusive lock wait count by instance', + type: 'counter', + aggLevel: 'none', + description: 'Database intent exclusive lock wait count per instance.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_acquireWaitCount_w{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - intent exclusive', + }, + }, + }, + + dbWaitCountSharedByInstance: { + name: 'Database shared lock wait count by instance', + type: 'counter', + aggLevel: 'none', + description: 'Database shared lock wait count per instance.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_acquireWaitCount_R{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - shared', + }, + }, + }, + + dbWaitCountIntentSharedByInstance: { + name: 'Database intent shared lock wait count by instance', + type: 'counter', + aggLevel: 'none', + description: 'Database intent shared lock wait count per instance.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_acquireWaitCount_r{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - intent shared', + }, + }, + }, + + // Database lock acquisition time + dbAcqTimeExclusive: { + name: 'Database exclusive lock acquisition time', + type: 'counter', + aggLevel: 'none', + description: 'Database exclusive lock acquisition time.', + unit: 'µs', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_timeAcquiringMicros_W{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - exclusive', + }, + }, + }, + + dbAcqTimeIntentExclusive: { + name: 'Database intent exclusive lock acquisition time', + type: 'counter', + aggLevel: 'none', + description: 'Database intent exclusive lock acquisition time.', + unit: 'µs', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_timeAcquiringMicros_w{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - intent exclusive', + }, + }, + }, + + dbAcqTimeShared: { + name: 'Database shared lock acquisition time', + type: 'counter', + aggLevel: 'none', + description: 'Database shared lock acquisition time.', + unit: 'µs', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_timeAcquiringMicros_R{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - shared', + }, + }, + }, + + dbAcqTimeIntentShared: { + name: 'Database intent shared lock acquisition time', + type: 'counter', + aggLevel: 'none', + description: 'Database intent shared lock acquisition time.', + unit: 'µs', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_timeAcquiringMicros_r{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - intent shared', + }, + }, + }, + + // Collection lock deadlocks + collDeadlockExclusive: { + name: 'Collection exclusive lock deadlocks', + type: 'counter', + aggLevel: 'none', + description: 'Collection exclusive lock deadlocks.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Collection_deadlockCount_W{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - exclusive', + }, + }, + }, + + collDeadlockIntentExclusive: { + name: 'Collection intent exclusive lock deadlocks', + type: 'counter', + aggLevel: 'none', + description: 'Collection intent exclusive lock deadlocks.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Collection_deadlockCount_w{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - intent exclusive', + }, + }, + }, + + collDeadlockShared: { + name: 'Collection shared lock deadlocks', + type: 'counter', + aggLevel: 'none', + description: 'Collection shared lock deadlocks.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Collection_deadlockCount_R{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - shared', + }, + }, + }, + + collDeadlockIntentShared: { + name: 'Collection intent shared lock deadlocks', + type: 'counter', + aggLevel: 'none', + description: 'Collection intent shared lock deadlocks.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Collection_deadlockCount_r{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - intent shared', + }, + }, + }, + + // Collection lock wait counts + collWaitCountExclusive: { + name: 'Collection exclusive lock wait count', + type: 'counter', + aggLevel: 'none', + description: 'Collection exclusive lock wait count.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Collection_acquireWaitCount_W{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - exclusive', + }, + }, + }, + + collWaitCountIntentExclusive: { + name: 'Collection intent exclusive lock wait count', + type: 'counter', + aggLevel: 'none', + description: 'Collection intent exclusive lock wait count.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Collection_acquireWaitCount_w{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - intent exclusive', + }, + }, + }, + + collWaitCountShared: { + name: 'Collection shared lock wait count', + type: 'counter', + aggLevel: 'none', + description: 'Collection shared lock wait count.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Collection_acquireWaitCount_R{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - shared', + }, + }, + }, + + collWaitCountIntentShared: { + name: 'Collection intent shared lock wait count', + type: 'counter', + aggLevel: 'none', + description: 'Collection intent shared lock wait count.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Collection_acquireWaitCount_r{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - intent shared', + }, + }, + }, + + // Collection lock acquisition time + collAcqTimeExclusive: { + name: 'Collection exclusive lock acquisition time', + type: 'counter', + aggLevel: 'none', + description: 'Collection exclusive lock acquisition time.', + unit: 'µs', + sources: { + prometheus: { + expr: 'mongodb_locks_Collection_timeAcquiringMicros_W{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - exclusive', + }, + }, + }, + + collAcqTimeIntentExclusive: { + name: 'Collection intent exclusive lock acquisition time', + type: 'counter', + aggLevel: 'none', + description: 'Collection intent exclusive lock acquisition time.', + unit: 'µs', + sources: { + prometheus: { + expr: 'mongodb_locks_Collection_timeAcquiringMicros_w{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - intent exclusive', + }, + }, + }, + + collAcqTimeShared: { + name: 'Collection shared lock acquisition time', + type: 'counter', + aggLevel: 'none', + description: 'Collection shared lock acquisition time.', + unit: 'µs', + sources: { + prometheus: { + expr: 'mongodb_locks_Collection_timeAcquiringMicros_R{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - shared', + }, + }, + }, + + collAcqTimeIntentShared: { + name: 'Collection intent shared lock acquisition time', + type: 'counter', + aggLevel: 'none', + description: 'Collection intent shared lock acquisition time.', + unit: 'µs', + sources: { + prometheus: { + expr: 'mongodb_locks_Collection_timeAcquiringMicros_r{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - intent shared', + }, + }, + }, + + // Disk space signals + diskSpaceFree: { + name: 'Disk space free', + type: 'gauge', + aggLevel: 'none', + description: 'Free disk space on the node.', + unit: 'decbytes', + sources: { + prometheus: { + expr: 'hardware_disk_metrics_disk_space_free_bytes{%(queriesSelector)s}', + legendCustomTemplate: '{{instance}} - free', + }, + }, + }, + + diskSpaceUsed: { + name: 'Disk space used', + type: 'gauge', + aggLevel: 'none', + description: 'Used disk space on the node.', + unit: 'decbytes', + sources: { + prometheus: { + expr: 'hardware_disk_metrics_disk_space_used_bytes{%(queriesSelector)s}', + legendCustomTemplate: '{{instance}} - used', + }, + }, + }, + + diskSpaceUtilizationByInstance: { + name: 'Disk space utilization by instance', + type: 'raw', + aggLevel: 'none', + description: 'Disk space utilization per instance.', + unit: 'percentunit', + sources: { + prometheus: { + expr: '(hardware_disk_metrics_disk_space_used_bytes{%(queriesSelector)s}) / clamp_min((hardware_disk_metrics_disk_space_free_bytes{%(queriesSelector)s}) + (hardware_disk_metrics_disk_space_used_bytes{%(queriesSelector)s}), 1)', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + // Memory signals + memoryResidentByInstance: { + name: 'Memory resident by instance', + type: 'gauge', + aggLevel: 'none', + description: 'Resident memory (RAM) usage per instance.', + unit: 'mbytes', + sources: { + prometheus: { + expr: 'mongodb_mem_resident{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}} - RAM', + }, + }, + }, + + memoryVirtualByInstance: { + name: 'Memory virtual by instance', + type: 'gauge', + aggLevel: 'none', + description: 'Virtual memory usage per instance.', + unit: 'mbytes', + sources: { + prometheus: { + expr: 'mongodb_mem_virtual{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}} - virtual', + }, + }, + }, + + // CPU interrupt service time by instance + cpuIrqTimeByInstance: { + name: 'CPU interrupt service time by instance', + type: 'counter', + aggLevel: 'none', + description: 'CPU time spent servicing interrupts per instance.', + unit: 'ms', + sources: { + prometheus: { + expr: 'hardware_system_cpu_irq_milliseconds{%(queriesSelector)s}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + // Network signals by instance + networkRequestsByInstance: { + name: 'Network requests by instance', + type: 'counter', + aggLevel: 'none', + description: 'Number of network requests per instance.', + unit: 'reqps', + sources: { + prometheus: { + expr: 'mongodb_network_numRequests{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + networkSlowDNSByInstance: { + name: 'Slow DNS operations by instance', + type: 'counter', + aggLevel: 'none', + description: 'Number of slow DNS operations per instance.', + unit: 'reqps', + sources: { + prometheus: { + expr: 'mongodb_network_numSlowDNSOperations{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}} - DNS', + }, + }, + }, + + networkSlowSSLByInstance: { + name: 'Slow SSL operations by instance', + type: 'counter', + aggLevel: 'none', + description: 'Number of slow SSL operations per instance.', + unit: 'reqps', + sources: { + prometheus: { + expr: 'mongodb_network_numSlowSSLOperations{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}} - SSL', + }, + }, + }, + + networkBytesInByInstance: { + name: 'Network bytes received by instance', + type: 'counter', + aggLevel: 'none', + description: 'Network bytes received per instance.', + unit: 'Bps', + sources: { + prometheus: { + expr: 'mongodb_network_bytesIn{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}} - received', + }, + }, + }, + + networkBytesOutByInstance: { + name: 'Network bytes sent by instance', + type: 'counter', + aggLevel: 'none', + description: 'Network bytes sent per instance.', + unit: 'Bps', + sources: { + prometheus: { + expr: 'mongodb_network_bytesOut{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}} - sent', + }, + }, + }, + + // Hardware I/O by instance + diskReadCountByInstance: { + name: 'Disk read operations by instance', + type: 'raw', + aggLevel: 'none', + description: 'Number of disk read operations per instance.', + unit: 'iops', + sources: { + prometheus: { + expr: 'rate(hardware_disk_metrics_read_count{job=~"$job",cl_name=~"$cl_name",instance=~"$instance"}[$__rate_interval])', + legendCustomTemplate: '{{instance}} - reads', + }, + }, + }, + + diskWriteCountByInstance: { + name: 'Disk write operations by instance', + type: 'raw', + aggLevel: 'none', + description: 'Number of disk write operations per instance.', + unit: 'iops', + sources: { + prometheus: { + expr: 'hardware_disk_metrics_write_count{%(queriesSelector)s}', + legendCustomTemplate: '{{instance}} - writes', + }, + }, + }, + + diskReadTimeByInstance: { + name: 'Disk read I/O time by instance', + type: 'counter', + aggLevel: 'none', + description: 'Time spent on read I/O operations per instance.', + unit: 'ms', + sources: { + prometheus: { + expr: 'hardware_disk_metrics_read_time_milliseconds{%(queriesSelector)s}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - read', + }, + }, + }, + + diskWriteTimeByInstance: { + name: 'Disk write I/O time by instance', + type: 'counter', + aggLevel: 'none', + description: 'Time spent on write I/O operations per instance.', + unit: 'ms', + sources: { + prometheus: { + expr: 'hardware_disk_metrics_write_time_milliseconds{%(queriesSelector)s}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - write', + }, + }, + }, + }, + } diff --git a/mongodb-atlas-mixin/signals/sharding.libsonnet b/mongodb-atlas-mixin/signals/sharding.libsonnet new file mode 100644 index 000000000..5b6242795 --- /dev/null +++ b/mongodb-atlas-mixin/signals/sharding.libsonnet @@ -0,0 +1,458 @@ +function(this) + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + enableLokiLogs: this.enableLokiLogs, + aggLevel: 'none', + aggFunction: 'sum', + signals: { + + // General sharding statistics + staleConfigErrors: { + name: 'Stale config errors', + type: 'counter', + rangeFunction: 'increase', + description: 'Stale config errors triggering metadata refresh.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_countStaleConfigErrors{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + moveChunksStarted: { + name: 'Chunk migrations started as recipient', + type: 'counter', + rangeFunction: 'increase', + description: 'Chunk migrations started as recipient.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_countRecipientMoveChunkStarted{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + docsClonedDonor: { + name: 'Documents cloned on donor', + type: 'counter', + rangeFunction: 'increase', + description: 'Documents cloned when acting as donor.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_countDocsClonedOnDonor{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}} - donor', + }, + }, + }, + + docsClonedRecipient: { + name: 'Documents cloned on recipient', + type: 'counter', + description: 'Documents cloned when acting as recipient.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_countDocsClonedOnRecipient{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}} - recipient', + }, + }, + }, + + criticalSectionTime: { + name: 'Critical section time', + type: 'counter', + description: 'Time in critical section during chunk migration.', + unit: 'ms', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_totalCriticalSectionTimeMillis{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + // Catalog cache refreshes + catalogCacheIncrementalRefreshes: { + name: 'Incremental catalog cache refreshes', + type: 'counter', + rangeFunction: 'increase', + description: 'Incremental catalog cache refreshes started.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_catalogCache_countIncrementalRefreshesStarted{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}} - incremental', + }, + }, + }, + + catalogCacheFullRefreshes: { + name: 'Full catalog cache refreshes', + type: 'counter', + rangeFunction: 'increase', + description: 'Full catalog cache refreshes started.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_catalogCache_countFullRefreshesStarted{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}} - full', + }, + }, + }, + + catalogCacheFailedRefreshes: { + name: 'Failed catalog cache refreshes', + type: 'counter', + rangeFunction: 'increase', + description: 'Failed catalog cache refreshes.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_catalogCache_countFailedRefreshes{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + catalogCacheStaleConfigErrors: { + name: 'Catalog cache stale config errors', + type: 'counter', + rangeFunction: 'increase', + description: 'Stale config errors in catalog cache.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_catalogCache_countStaleConfigErrors{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + catalogCacheDatabaseEntries: { + name: 'Database entries in catalog cache', + type: 'counter', + rangeFunction: 'increase', + description: 'Database entries in catalog cache.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_catalogCache_numDatabaseEntries{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}} - database', + }, + }, + }, + + catalogCacheCollectionEntries: { + name: 'Collection entries in catalog cache', + type: 'counter', + rangeFunction: 'increase', + description: 'Collection entries in catalog cache.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_catalogCache_numCollectionEntries{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}} - collection', + }, + }, + }, + + catalogCacheRefreshWaitTime: { + name: 'Catalog cache refresh wait time', + type: 'counter', + rangeFunction: 'increase', + description: 'Total time waiting for catalog cache refresh.', + unit: 'µs', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_catalogCache_totalRefreshWaitTimeMicros{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + catalogCacheOpsBlocked: { + name: 'Operations blocked by catalog cache refresh', + type: 'counter', + description: 'Operations blocked by catalog cache refresh.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_catalogCache_operationsBlockedByRefresh_countAllOperations{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + // Targeting - allShards + targetingFindAllShards: { + name: 'Find operations targeting all shards', + type: 'counter', + description: 'Find operations targeting all shards.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_numHostsTargeted_find_allShards{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}} - find', + }, + }, + }, + + targetingInsertAllShards: { + name: 'Insert operations targeting all shards', + type: 'counter', + description: 'Insert operations targeting all shards.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_numHostsTargeted_insert_allShards{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}} - insert', + }, + }, + }, + + targetingUpdateAllShards: { + name: 'Update operations targeting all shards', + type: 'counter', + description: 'Update operations targeting all shards.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_numHostsTargeted_update_allShards{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}} - update', + }, + }, + }, + + targetingDeleteAllShards: { + name: 'Delete operations targeting all shards', + type: 'counter', + description: 'Delete operations targeting all shards.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_numHostsTargeted_delete_allShards{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}} - delete', + }, + }, + }, + + targetingAggregateAllShards: { + name: 'Aggregate operations targeting all shards', + type: 'counter', + description: 'Aggregate operations targeting all shards.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_numHostsTargeted_aggregate_allShards{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}} - aggregate', + }, + }, + }, + + // Targeting - manyShards + targetingFindManyShards: { + name: 'Find operations targeting many shards', + type: 'counter', + description: 'Find operations targeting many shards.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_numHostsTargeted_find_manyShards{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}} - find', + }, + }, + }, + + targetingInsertManyShards: { + name: 'Insert operations targeting many shards', + type: 'counter', + description: 'Insert operations targeting many shards.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_numHostsTargeted_insert_manyShards{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}} - insert', + }, + }, + }, + + targetingUpdateManyShards: { + name: 'Update operations targeting many shards', + type: 'counter', + description: 'Update operations targeting many shards.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_numHostsTargeted_update_manyShards{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}} - update', + }, + }, + }, + + targetingDeleteManyShards: { + name: 'Delete operations targeting many shards', + type: 'counter', + description: 'Delete operations targeting many shards.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_numHostsTargeted_delete_manyShards{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}} - delete', + }, + }, + }, + + targetingAggregateManyShards: { + name: 'Aggregate operations targeting many shards', + type: 'counter', + description: 'Aggregate operations targeting many shards.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_numHostsTargeted_aggregate_manyShards{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}} - aggregate', + }, + }, + }, + + // Targeting - oneShard + targetingFindOneShard: { + name: 'Find operations targeting one shard', + type: 'counter', + description: 'Find operations targeting one shard.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_numHostsTargeted_find_oneShard{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}} - find', + }, + }, + }, + + targetingInsertOneShard: { + name: 'Insert operations targeting one shard', + type: 'counter', + description: 'Insert operations targeting one shard.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_numHostsTargeted_insert_oneShard{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}} - insert', + }, + }, + }, + + targetingUpdateOneShard: { + name: 'Update operations targeting one shard', + type: 'counter', + description: 'Update operations targeting one shard.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_numHostsTargeted_update_oneShard{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}} - update', + }, + }, + }, + + targetingDeleteOneShard: { + name: 'Delete operations targeting one shard', + type: 'counter', + description: 'Delete operations targeting one shard.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_numHostsTargeted_delete_oneShard{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}} - delete', + }, + }, + }, + + targetingAggregateOneShard: { + name: 'Aggregate operations targeting one shard', + type: 'counter', + description: 'Aggregate operations targeting one shard.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_numHostsTargeted_aggregate_oneShard{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}} - aggregate', + }, + }, + }, + + // Targeting - unsharded + targetingFindUnsharded: { + name: 'Find operations on unsharded collections', + type: 'counter', + description: 'Find operations on unsharded collections.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_numHostsTargeted_find_unsharded{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}} - find', + }, + }, + }, + + targetingInsertUnsharded: { + name: 'Insert operations on unsharded collections', + type: 'counter', + description: 'Insert operations on unsharded collections.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_numHostsTargeted_insert_unsharded{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}} - insert', + }, + }, + }, + + targetingUpdateUnsharded: { + name: 'Update operations on unsharded collections', + type: 'counter', + description: 'Update operations on unsharded collections.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_numHostsTargeted_update_unsharded{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}} - update', + }, + }, + }, + + targetingDeleteUnsharded: { + name: 'Delete operations on unsharded collections', + type: 'counter', + description: 'Delete operations on unsharded collections.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_numHostsTargeted_delete_unsharded{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}} - delete', + }, + }, + }, + + targetingAggregateUnsharded: { + name: 'Aggregate operations on unsharded collections', + type: 'counter', + description: 'Aggregate operations on unsharded collections.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_numHostsTargeted_aggregate_unsharded{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + legendCustomTemplate: '{{instance}} - aggregate', + }, + }, + }, + }, + } From e63ac4d7abc3540e927ef5631f5d4f5c19f0df5d Mon Sep 17 00:00:00 2001 From: schmikei Date: Tue, 18 Nov 2025 13:57:30 -0500 Subject: [PATCH 2/2] testing fixes/aggregate average catch-up operatiosn --- mongodb-atlas-mixin/dashboards.libsonnet | 2 +- .../mongodb-atlas-cluster-overview.json | 467 +++++++-------- .../mongodb-atlas-elections-overview.json | 14 +- .../mongodb-atlas-operations-overview.json | 93 ++- .../mongodb-atlas-performance-overview.json | 75 ++- .../mongodb-atlas-sharding-overview.json | 12 +- mongodb-atlas-mixin/links.libsonnet | 21 +- mongodb-atlas-mixin/mixin.libsonnet | 3 + mongodb-atlas-mixin/panels.libsonnet | 565 +++++------------- mongodb-atlas-mixin/rows.libsonnet | 46 +- mongodb-atlas-mixin/signals/cluster.libsonnet | 14 +- .../signals/elections.libsonnet | 3 +- .../signals/operations.libsonnet | 229 +------ 13 files changed, 517 insertions(+), 1027 deletions(-) diff --git a/mongodb-atlas-mixin/dashboards.libsonnet b/mongodb-atlas-mixin/dashboards.libsonnet index 8ed8fc38f..d05a88df4 100644 --- a/mongodb-atlas-mixin/dashboards.libsonnet +++ b/mongodb-atlas-mixin/dashboards.libsonnet @@ -84,7 +84,7 @@ local g = import './g.libsonnet'; g.util.panel.resolveCollapsedFlagOnRows( g.util.grid.wrapPanels( [ - this.grafana.rows.operationsCountersInstanceRow, + this.grafana.rows.operationsRow, this.grafana.rows.operationsConnectionsRow, this.grafana.rows.operationsReadWriteRow, this.grafana.rows.operationsLocksRow, diff --git a/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-cluster-overview.json b/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-cluster-overview.json index 8564b3790..d930f2d85 100644 --- a/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-cluster-overview.json +++ b/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-cluster-overview.json @@ -16,6 +16,16 @@ "type": "link", "url": "/d/mongodb-atlas-operations-overview" }, + { + "asDropdown": true, + "includeVars": true, + "keepTime": true, + "tags": [ + "mongodb-atlas-mixin" + ], + "title": "All dashboards", + "type": "dashboards" + }, { "keepTime": true, "title": "MongoDB Atlas performance overview", @@ -45,36 +55,16 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${datasource}" }, "description": "An inventory table for shard nodes in the environment.", "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "1": { - "index": 0, - "text": "Primary" - }, - "2": { - "index": 1, - "text": "Secondary" - } - }, - "type": "value" - } - ] - }, "overrides": [ { "matcher": { "id": "byName", - "options": "cl_role" + "options": "Role" }, "properties": [ { @@ -86,19 +76,45 @@ { "matcher": { "id": "byName", - "options": "rs_state" + "options": "State" }, "properties": [ { "id": "custom.width", "value": 100 + }, + { + "id": "custom.cellOptions", + "value": { + "type": "color-text" + } + }, + { + "id": "mappings", + "value": [ + { + "options": { + "1": { + "color": "green", + "index": 0, + "text": "Primary" + }, + "2": { + "color": "yellow", + "index": 1, + "text": "Secondary" + } + }, + "type": "value" + } + ] } ] }, { "matcher": { "id": "byName", - "options": "rs_nm" + "options": "Replica set" }, "properties": [ { @@ -110,7 +126,7 @@ { "matcher": { "id": "byName", - "options": "cl_name" + "options": "Cluster" }, "properties": [ { @@ -122,7 +138,7 @@ { "matcher": { "id": "byName", - "options": "group_id" + "options": "Group" }, "properties": [ { @@ -130,40 +146,6 @@ "value": 300 } ] - }, - { - "matcher": { - "id": "byName", - "options": "State" - }, - "properties": [ - { - "id": "custom.cellOptions", - "value": { - "type": "color-text" - } - }, - { - "id": "mappings", - "value": [ - { - "options": { - "1": { - "color": "green", - "index": 0, - "text": "Primary" - }, - "2": { - "color": "yellow", - "index": 1, - "text": "Secondary" - } - }, - "type": "value" - } - ] - } - ] } ] }, @@ -174,6 +156,11 @@ "y": 1 }, "id": 2, + "options": { + "footer": { + "enablePagination": true + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -181,7 +168,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "mongodb_network_bytesIn{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}", + "expr": "mongodb_network_bytesIn{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\", cl_role=\"shardsvr\"}", "format": "table", "instant": true, "legendFormat": "", @@ -190,15 +177,6 @@ ], "title": "Shard nodes", "transformations": [ - { - "id": "reduce", - "options": { - "labelsToFields": true, - "reducers": [ - "lastNotNull" - ] - } - }, { "id": "organize", "options": { @@ -235,21 +213,18 @@ } }, { - "id": "filterByValue", + "id": "filterFieldsByName", "options": { - "filters": [ - { - "config": { - "id": "equal", - "options": { - "value": "shardsvr" - } - }, - "fieldName": "Role" - } - ], - "match": "all", - "type": "include" + "include": { + "names": [ + "Group", + "Cluster", + "Role", + "Node", + "Replica set", + "State" + ] + } } } ], @@ -270,36 +245,16 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${datasource}" }, "description": "An inventory table for config nodes in the environment.", "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "1": { - "index": 0, - "text": "Primary" - }, - "2": { - "index": 1, - "text": "Secondary" - } - }, - "type": "value" - } - ] - }, "overrides": [ { "matcher": { "id": "byName", - "options": "cl_role" + "options": "Role" }, "properties": [ { @@ -311,19 +266,45 @@ { "matcher": { "id": "byName", - "options": "rs_state" + "options": "State" }, "properties": [ { "id": "custom.width", "value": 100 + }, + { + "id": "custom.cellOptions", + "value": { + "type": "color-text" + } + }, + { + "id": "mappings", + "value": [ + { + "options": { + "1": { + "color": "green", + "index": 0, + "text": "Primary" + }, + "2": { + "color": "yellow", + "index": 1, + "text": "Secondary" + } + }, + "type": "value" + } + ] } ] }, { "matcher": { "id": "byName", - "options": "rs_nm" + "options": "Replica set" }, "properties": [ { @@ -335,7 +316,7 @@ { "matcher": { "id": "byName", - "options": "cl_name" + "options": "Cluster" }, "properties": [ { @@ -347,7 +328,7 @@ { "matcher": { "id": "byName", - "options": "group_id" + "options": "Group" }, "properties": [ { @@ -355,40 +336,6 @@ "value": 300 } ] - }, - { - "matcher": { - "id": "byName", - "options": "State" - }, - "properties": [ - { - "id": "custom.cellOptions", - "value": { - "type": "color-text" - } - }, - { - "id": "mappings", - "value": [ - { - "options": { - "1": { - "color": "green", - "index": 0, - "text": "Primary" - }, - "2": { - "color": "yellow", - "index": 1, - "text": "Secondary" - } - }, - "type": "value" - } - ] - } - ] } ] }, @@ -399,6 +346,11 @@ "y": 10 }, "id": 4, + "options": { + "footer": { + "enablePagination": true + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -406,7 +358,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "mongodb_network_bytesIn{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}", + "expr": "mongodb_network_bytesIn{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\", cl_role=\"configsvr\"}", "format": "table", "instant": true, "legendFormat": "", @@ -415,15 +367,6 @@ ], "title": "Config nodes", "transformations": [ - { - "id": "reduce", - "options": { - "labelsToFields": true, - "reducers": [ - "lastNotNull" - ] - } - }, { "id": "organize", "options": { @@ -460,21 +403,18 @@ } }, { - "id": "filterByValue", + "id": "filterFieldsByName", "options": { - "filters": [ - { - "config": { - "id": "equal", - "options": { - "value": "configsvr" - } - }, - "fieldName": "Role" - } - ], - "match": "all", - "type": "include" + "include": { + "names": [ + "Group", + "Cluster", + "Role", + "Node", + "Replica set", + "State" + ] + } } } ], @@ -495,36 +435,16 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${datasource}" }, "description": "An inventory table for mongos nodes in the environment.", "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "1": { - "index": 0, - "text": "Primary" - }, - "2": { - "index": 1, - "text": "Secondary" - } - }, - "type": "value" - } - ] - }, "overrides": [ { "matcher": { "id": "byName", - "options": "cl_role" + "options": "Role" }, "properties": [ { @@ -536,19 +456,7 @@ { "matcher": { "id": "byName", - "options": "rs_state" - }, - "properties": [ - { - "id": "custom.width", - "value": 100 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "rs_nm" + "options": "Replica set" }, "properties": [ { @@ -560,7 +468,7 @@ { "matcher": { "id": "byName", - "options": "cl_name" + "options": "Cluster" }, "properties": [ { @@ -572,7 +480,7 @@ { "matcher": { "id": "byName", - "options": "group_id" + "options": "Group" }, "properties": [ { @@ -590,6 +498,11 @@ "y": 19 }, "id": 6, + "options": { + "footer": { + "enablePagination": true + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -597,7 +510,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "mongodb_network_bytesIn{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}", + "expr": "mongodb_network_bytesIn{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\", cl_role=\"mongos\"}", "format": "table", "instant": true, "legendFormat": "", @@ -606,15 +519,6 @@ ], "title": "mongos nodes", "transformations": [ - { - "id": "reduce", - "options": { - "labelsToFields": true, - "reducers": [ - "lastNotNull" - ] - } - }, { "id": "organize", "options": { @@ -651,21 +555,18 @@ } }, { - "id": "filterByValue", + "id": "filterFieldsByName", "options": { - "filters": [ - { - "config": { - "id": "equal", - "options": { - "value": "mongos" - } - }, - "fieldName": "Role" - } - ], - "match": "all", - "type": "include" + "include": { + "names": [ + "Group", + "Cluster", + "Role", + "Node", + "Replica set", + "State" + ] + } } } ], @@ -697,10 +598,7 @@ "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "showPoints": "never", - "stacking": { - "mode": "normal" - } + "showPoints": "never" }, "unit": "iops" } @@ -714,8 +612,10 @@ "id": 8, "options": { "legend": { + "asTable": true, "calcs": [ ], - "displayMode": "list" + "displayMode": "list", + "placement": "right" }, "tooltip": { "mode": "multi", @@ -763,10 +663,7 @@ "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "showPoints": "never", - "stacking": { - "mode": "normal" - } + "showPoints": "never" }, "unit": "ms" } @@ -780,8 +677,10 @@ "id": 9, "options": { "legend": { + "asTable": true, "calcs": [ ], - "displayMode": "list" + "displayMode": "list", + "placement": "right" }, "tooltip": { "mode": "multi", @@ -978,7 +877,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "(sum without (disk_name) (hardware_disk_metrics_disk_space_used_bytes{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"})) / clamp_min((sum without (disk_name) (hardware_disk_metrics_disk_space_used_bytes{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"})) + (sum without (disk_name) (hardware_disk_metrics_disk_space_free_bytes{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"})), 1)", + "expr": "(sum (hardware_disk_metrics_disk_space_used_bytes{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}) by (cl_name)) / clamp_min((sum (hardware_disk_metrics_disk_space_used_bytes{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}) by (cl_name)) + (sum (hardware_disk_metrics_disk_space_free_bytes{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}) by (cl_name)), 1)", "format": "time_series", "instant": false, "legendFormat": "{{cl_name}}", @@ -1053,11 +952,10 @@ "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "showPoints": "never", - "stacking": { - "mode": "normal" - } + "showPoints": "never" }, + "decimals": 1, + "noValue": "No traffic", "unit": "Bps" } }, @@ -1070,8 +968,10 @@ "id": 14, "options": { "legend": { + "asTable": true, "calcs": [ ], - "displayMode": "list" + "displayMode": "list", + "placement": "right" }, "tooltip": { "mode": "multi", @@ -1121,6 +1021,8 @@ "lineWidth": 2, "showPoints": "never" }, + "decimals": 1, + "noValue": "No traffic", "unit": "reqps" } }, @@ -1159,7 +1061,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "rate(mongodb_network_numSlowSSLOperations{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", + "expr": "sum(rate(mongodb_network_numSlowSSLOperations{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])) by (cl_name)", "format": "time_series", "instant": false, "legendFormat": "{{cl_name}} - SSL", @@ -1197,7 +1099,9 @@ "lineWidth": 2, "showPoints": "never" }, - "unit": "conns/s" + "decimals": 1, + "noValue": "No packets", + "unit": "conns" } }, "gridPos": { @@ -1224,9 +1128,10 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "sum(rate(mongodb_connections_totalCreated{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}[$__rate_interval])) by (cl_name)", + "expr": "sum(increase(mongodb_connections_totalCreated{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}[$__interval:] offset -$__interval)) by (cl_name)", "format": "time_series", "instant": false, + "interval": "2m", "legendFormat": "{{cl_name}}", "refId": "Connections created" } @@ -1243,17 +1148,30 @@ "fieldConfig": { "defaults": { "custom": { - "fillOpacity": 30, + "axisCenteredZero": true, + "axisLabel": "write(-) | read(+)", + "fillOpacity": 1, "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "showPoints": "never", - "stacking": { - "mode": "normal" - } + "showPoints": "never" }, "unit": "ops" - } + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/write|written/" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" + } + ] + } + ] }, "gridPos": { "h": 8, @@ -1264,8 +1182,10 @@ "id": 18, "options": { "legend": { + "asTable": true, "calcs": [ ], - "displayMode": "list" + "displayMode": "list", + "placement": "right" }, "tooltip": { "mode": "multi", @@ -1315,8 +1235,9 @@ "id": 19, "options": { "legend": { + "asTable": true, "displayMode": "table", - "placement": "bottom", + "placement": "right", "values": [ "value" ] @@ -1394,7 +1315,9 @@ "fieldConfig": { "defaults": { "custom": { - "fillOpacity": 30, + "axisCenteredZero": true, + "axisLabel": "write(-) | read(+)", + "fillOpacity": 1, "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, @@ -1404,7 +1327,21 @@ } }, "unit": "µs" - } + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/write|written/" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" + } + ] + } + ] }, "gridPos": { "h": 8, @@ -1609,10 +1546,7 @@ "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "showPoints": "never", - "stacking": { - "mode": "normal" - } + "showPoints": "never" } } }, @@ -1625,6 +1559,7 @@ "id": 24, "options": { "legend": { + "asTable": true, "calcs": [ ], "displayMode": "list", "placement": "right" @@ -1832,7 +1767,7 @@ "uid": "${datasource}" }, "includeAll": true, - "label": "Instance", + "label": "Node", "multi": true, "name": "instance", "query": "label_values(mongodb_network_bytesIn{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\"}, instance)", diff --git a/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-elections-overview.json b/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-elections-overview.json index 6430f84ca..af9c48dd4 100644 --- a/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-elections-overview.json +++ b/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-elections-overview.json @@ -16,6 +16,16 @@ "type": "link", "url": "/d/mongodb-atlas-operations-overview" }, + { + "asDropdown": true, + "includeVars": true, + "keepTime": true, + "tags": [ + "mongodb-atlas-mixin" + ], + "title": "All dashboards", + "type": "dashboards" + }, { "keepTime": true, "title": "MongoDB Atlas performance overview", @@ -631,7 +641,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "mongodb_electionMetrics_averageCatchUpOps{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}", + "expr": "sum by (job,cl_name,instance) (\n mongodb_electionMetrics_averageCatchUpOps{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}\n)", "format": "time_series", "instant": false, "legendFormat": "{{instance}}", @@ -693,7 +703,7 @@ "uid": "${datasource}" }, "includeAll": true, - "label": "Instance", + "label": "Node", "multi": true, "name": "instance", "query": "label_values(mongodb_network_bytesIn{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\"}, instance)", diff --git a/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-operations-overview.json b/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-operations-overview.json index 88a801f31..89cc8aca3 100644 --- a/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-operations-overview.json +++ b/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-operations-overview.json @@ -16,6 +16,16 @@ "type": "link", "url": "/d/mongodb-atlas-elections-overview" }, + { + "asDropdown": true, + "includeVars": true, + "keepTime": true, + "tags": [ + "mongodb-atlas-mixin" + ], + "title": "All dashboards", + "type": "dashboards" + }, { "keepTime": true, "title": "MongoDB Atlas performance overview", @@ -40,7 +50,7 @@ }, "id": 1, "panels": [ ], - "title": "Operation counters - instance", + "title": "Operations", "type": "row" }, { @@ -48,7 +58,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "description": "The rate of insert operations the node has received.", + "description": "The number of query operations.", "fieldConfig": { "defaults": { "custom": { @@ -56,12 +66,9 @@ "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "showPoints": "never", - "stacking": { - "mode": "normal" - } + "showPoints": "never" }, - "unit": "ops" + "unit": "none" } }, "gridPos": { @@ -88,14 +95,15 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "rate(mongodb_opcounters_insert{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", + "expr": "rate(mongodb_opcounters_query{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", "format": "time_series", "instant": false, + "interval": "2m", "legendFormat": "{{instance}}", - "refId": "Insert operations by instance" + "refId": "Query operations" } ], - "title": "Insert operations", + "title": "Query operations", "type": "timeseries" }, { @@ -103,7 +111,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "description": "The rate of query operations the node has received.", + "description": "The number of insert operations.", "fieldConfig": { "defaults": { "custom": { @@ -111,12 +119,9 @@ "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "showPoints": "never", - "stacking": { - "mode": "normal" - } + "showPoints": "never" }, - "unit": "ops" + "unit": "none" } }, "gridPos": { @@ -143,14 +148,15 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "rate(mongodb_opcounters_query{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", + "expr": "rate(mongodb_opcounters_insert{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", "format": "time_series", "instant": false, + "interval": "2m", "legendFormat": "{{instance}}", - "refId": "Query operations by instance" + "refId": "Insert operations" } ], - "title": "Query operations", + "title": "Insert operations", "type": "timeseries" }, { @@ -158,7 +164,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "description": "The rate of update operations this node has received.", + "description": "The number of update operations.", "fieldConfig": { "defaults": { "custom": { @@ -166,12 +172,9 @@ "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "showPoints": "never", - "stacking": { - "mode": "normal" - } + "showPoints": "never" }, - "unit": "ops" + "unit": "none" } }, "gridPos": { @@ -201,6 +204,7 @@ "expr": "rate(mongodb_opcounters_update{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", "format": "time_series", "instant": false, + "interval": "2m", "legendFormat": "{{instance}}", "refId": "Update operations" } @@ -213,7 +217,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "description": "The rate of delete operations this node has received.", + "description": "The number of delete operations.", "fieldConfig": { "defaults": { "custom": { @@ -221,12 +225,9 @@ "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "showPoints": "never", - "stacking": { - "mode": "normal" - } + "showPoints": "never" }, - "unit": "ops" + "unit": "none" } }, "gridPos": { @@ -256,8 +257,9 @@ "expr": "rate(mongodb_opcounters_delete{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs_nm\"}[$__rate_interval])", "format": "time_series", "instant": false, + "interval": "2m", "legendFormat": "{{instance}}", - "refId": "Delete operations by instance" + "refId": "Delete operations" } ], "title": "Delete operations", @@ -291,6 +293,8 @@ "lineWidth": 2, "showPoints": "never" }, + "decimals": 1, + "noValue": "No packets", "unit": "none" } }, @@ -406,10 +410,7 @@ "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "showPoints": "never", - "stacking": { - "mode": "normal" - } + "showPoints": "never" }, "unit": "ops" } @@ -423,8 +424,10 @@ "id": 10, "options": { "legend": { + "asTable": true, "calcs": [ ], - "displayMode": "list" + "displayMode": "list", + "placement": "right" }, "tooltip": { "mode": "multi", @@ -472,10 +475,7 @@ "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "showPoints": "never", - "stacking": { - "mode": "normal" - } + "showPoints": "never" }, "unit": "µs" } @@ -489,8 +489,10 @@ "id": 11, "options": { "legend": { + "asTable": true, "calcs": [ ], - "displayMode": "list" + "displayMode": "list", + "placement": "right" }, "tooltip": { "mode": "multi", @@ -553,10 +555,7 @@ "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "showPoints": "never", - "stacking": { - "mode": "normal" - } + "showPoints": "never" }, "unit": "none" } @@ -1144,7 +1143,7 @@ "uid": "${datasource}" }, "includeAll": true, - "label": "Instance", + "label": "Node", "multi": true, "name": "instance", "query": "label_values(mongodb_network_bytesIn{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\"}, instance)", diff --git a/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-performance-overview.json b/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-performance-overview.json index da714c395..c3d5baed9 100644 --- a/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-performance-overview.json +++ b/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-performance-overview.json @@ -22,6 +22,16 @@ "type": "link", "url": "/d/mongodb-atlas-operations-overview" }, + { + "asDropdown": true, + "includeVars": true, + "keepTime": true, + "tags": [ + "mongodb-atlas-mixin" + ], + "title": "All dashboards", + "type": "dashboards" + }, { "keepTime": true, "title": "MongoDB Atlas sharding overview", @@ -70,8 +80,10 @@ "id": 2, "options": { "legend": { + "asTable": true, "calcs": [ ], - "displayMode": "list" + "displayMode": "list", + "placement": "right" }, "tooltip": { "mode": "multi", @@ -181,7 +193,7 @@ "fieldConfig": { "defaults": { "custom": { - "fillOpacity": 30, + "fillOpacity": 1, "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, @@ -199,8 +211,10 @@ "id": 5, "options": { "legend": { + "asTable": true, "calcs": [ ], - "displayMode": "list" + "displayMode": "list", + "placement": "right" }, "tooltip": { "mode": "multi", @@ -369,6 +383,8 @@ "lineWidth": 2, "showPoints": "never" }, + "decimals": 1, + "noValue": "No traffic", "unit": "reqps" } }, @@ -381,8 +397,10 @@ "id": 9, "options": { "legend": { + "asTable": true, "calcs": [ ], - "displayMode": "list" + "displayMode": "list", + "placement": "right" }, "tooltip": { "mode": "multi", @@ -430,10 +448,7 @@ "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "showPoints": "never", - "stacking": { - "mode": "normal" - } + "showPoints": "never" }, "unit": "Bps" } @@ -447,8 +462,10 @@ "id": 10, "options": { "legend": { + "asTable": true, "calcs": [ ], - "displayMode": "list" + "displayMode": "list", + "placement": "right" }, "tooltip": { "mode": "multi", @@ -505,17 +522,30 @@ "fieldConfig": { "defaults": { "custom": { - "fillOpacity": 30, + "axisCenteredZero": true, + "axisLabel": "write(-) | read(+)", + "fillOpacity": 1, "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "showPoints": "never", - "stacking": { - "mode": "normal" - } + "showPoints": "never" }, "unit": "iops" - } + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/write|written/" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" + } + ] + } + ] }, "gridPos": { "h": 8, @@ -526,8 +556,10 @@ "id": 12, "options": { "legend": { + "asTable": true, "calcs": [ ], - "displayMode": "list" + "displayMode": "list", + "placement": "right" }, "tooltip": { "mode": "multi", @@ -575,10 +607,7 @@ "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "showPoints": "never", - "stacking": { - "mode": "normal" - } + "showPoints": "never" }, "unit": "ms" } @@ -592,8 +621,10 @@ "id": 13, "options": { "legend": { + "asTable": true, "calcs": [ ], - "displayMode": "list" + "displayMode": "list", + "placement": "right" }, "tooltip": { "mode": "multi", @@ -682,7 +713,7 @@ "uid": "${datasource}" }, "includeAll": true, - "label": "Instance", + "label": "Node", "multi": true, "name": "instance", "query": "label_values(mongodb_network_bytesIn{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\"}, instance)", diff --git a/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-sharding-overview.json b/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-sharding-overview.json index eb83fae2e..714796090 100644 --- a/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-sharding-overview.json +++ b/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-sharding-overview.json @@ -22,6 +22,16 @@ "type": "link", "url": "/d/mongodb-atlas-operations-overview" }, + { + "asDropdown": true, + "includeVars": true, + "keepTime": true, + "tags": [ + "mongodb-atlas-mixin" + ], + "title": "All dashboards", + "type": "dashboards" + }, { "keepTime": true, "title": "MongoDB Atlas performance overview", @@ -1080,7 +1090,7 @@ "uid": "${datasource}" }, "includeAll": true, - "label": "Instance", + "label": "Node", "multi": true, "name": "instance", "query": "label_values(mongodb_network_bytesIn{job=\"integrations/mongodb-atlas\",job=~\"$job\",cl_name=~\"$cl_name\"}, instance)", diff --git a/mongodb-atlas-mixin/links.libsonnet b/mongodb-atlas-mixin/links.libsonnet index 529632852..aa7b6b926 100644 --- a/mongodb-atlas-mixin/links.libsonnet +++ b/mongodb-atlas-mixin/links.libsonnet @@ -4,28 +4,27 @@ local g = import './g.libsonnet'; local link = g.dashboard.link, new(this): { clusterOverview: - link.link.new('MongoDB Atlas cluster overview', '/d/' + this.config.uid + '-cluster-overview') + link.link.new('MongoDB Atlas cluster overview', '/d/' + this.grafana.dashboards['mongodb-atlas-cluster-overview.json'].uid) + link.link.options.withKeepTime(true), electionsOverview: - link.link.new('MongoDB Atlas elections overview', '/d/' + this.config.uid + '-elections-overview') + link.link.new('MongoDB Atlas elections overview', '/d/' + this.grafana.dashboards['mongodb-atlas-elections-overview.json'].uid) + link.link.options.withKeepTime(true), operationsOverview: - link.link.new('MongoDB Atlas operations overview', '/d/' + this.config.uid + '-operations-overview') + link.link.new('MongoDB Atlas operations overview', '/d/' + this.grafana.dashboards['mongodb-atlas-operations-overview.json'].uid) + link.link.options.withKeepTime(true), performanceOverview: - link.link.new('MongoDB Atlas performance overview', '/d/' + this.config.uid + '-performance-overview') + link.link.new('MongoDB Atlas performance overview', '/d/' + this.grafana.dashboards['mongodb-atlas-performance-overview.json'].uid) + link.link.options.withKeepTime(true), - } + if this.config.enableShardingOverview then { - shardingOverview: - link.link.new('MongoDB Atlas sharding overview', '/d/' + this.config.uid + '-sharding-overview') - + link.link.options.withKeepTime(true), - } else { - shardingOverview: {}, - } + { otherDashboards: link.dashboards.new('All dashboards', this.config.dashboardTags) + link.dashboards.options.withIncludeVars(true) + link.dashboards.options.withKeepTime(true) + link.dashboards.options.withAsDropdown(true), + } + if this.config.enableShardingOverview then { + shardingOverview: + link.link.new('MongoDB Atlas sharding overview', '/d/' + this.grafana.dashboards['mongodb-atlas-sharding-overview.json'].uid) + + link.link.options.withKeepTime(true), + } else { + shardingOverview: {}, }, } diff --git a/mongodb-atlas-mixin/mixin.libsonnet b/mongodb-atlas-mixin/mixin.libsonnet index d4ebe073e..b02db8205 100644 --- a/mongodb-atlas-mixin/mixin.libsonnet +++ b/mongodb-atlas-mixin/mixin.libsonnet @@ -15,6 +15,9 @@ local optional_labels = { cl_name+: { label: 'Atlas cluster', }, + instance+: { + label: 'Node', + }, }; // populate monitoring-mixin: diff --git a/mongodb-atlas-mixin/panels.libsonnet b/mongodb-atlas-mixin/panels.libsonnet index 82cd14c1c..e8ece4bd2 100644 --- a/mongodb-atlas-mixin/panels.libsonnet +++ b/mongodb-atlas-mixin/panels.libsonnet @@ -10,120 +10,155 @@ local commonlib = import 'common-lib/common/main.libsonnet'; // shardNodesTable: - g.panel.table.new('Shard nodes') - + g.panel.table.panelOptions.withDescription('An inventory table for shard nodes in the environment.') - + g.panel.table.queryOptions.withTargets([ - signals.cluster.shardNodeRepresentativeMetric.asTableTarget(), - ]) + commonlib.panels.generic.table.base.new( + 'Shard nodes', + targets=[ + signals.cluster.shardNodeRepresentativeMetric.asTableTarget(), + ], + description='An inventory table for shard nodes in the environment.' + ) + g.panel.table.queryOptions.withTransformations([ - { id: 'reduce', options: { labelsToFields: true, reducers: ['lastNotNull'] } }, { id: 'organize', options: { excludeByName: { Field: true, 'Last *': true, __name__: true, job: true, org_id: true, process_port: true }, indexByName: { Field: 6, 'Last *': 11, __name__: 7, cl_name: 1, cl_role: 2, group_id: 0, instance: 3, job: 8, org_id: 9, process_port: 10, rs_nm: 4, rs_state: 5 }, renameByName: { cl_name: 'Cluster', cl_role: 'Role', group_id: 'Group', instance: 'Node', rs_nm: 'Replica set', rs_state: 'State' }, } }, - { id: 'filterByValue', options: { filters: [{ config: { id: 'equal', options: { value: 'shardsvr' } }, fieldName: 'Role' }], match: 'all', type: 'include' } }, - ]) - + g.panel.table.standardOptions.color.withMode('thresholds') - + g.panel.table.standardOptions.withMappings([ - g.panel.table.standardOptions.mapping.ValueMap.withType() - + g.panel.table.standardOptions.mapping.ValueMap.withOptions({ - '1': { index: 0, text: 'Primary' }, - '2': { index: 1, text: 'Secondary' }, - }), - ]) - + g.panel.table.standardOptions.withOverrides([ - g.panel.table.fieldOverride.byName.new('cl_role') + { id: 'filterFieldsByName', options: { + include: { + names: [ + 'Group', + 'Cluster', + 'Role', + 'Node', + 'Replica set', + 'State', + ], + }, + } }, + ]) + + g.panel.table.standardOptions.withOverridesMixin([ + g.panel.table.fieldOverride.byName.new('Role') + g.panel.table.fieldOverride.byName.withProperty('custom.width', 150), - g.panel.table.fieldOverride.byName.new('rs_state') - + g.panel.table.fieldOverride.byName.withProperty('custom.width', 100), - g.panel.table.fieldOverride.byName.new('rs_nm') - + g.panel.table.fieldOverride.byName.withProperty('custom.width', 250), - g.panel.table.fieldOverride.byName.new('cl_name') - + g.panel.table.fieldOverride.byName.withProperty('custom.width', 300), - g.panel.table.fieldOverride.byName.new('group_id') - + g.panel.table.fieldOverride.byName.withProperty('custom.width', 300), + ]) + + g.panel.table.standardOptions.withOverridesMixin([ g.panel.table.fieldOverride.byName.new('State') + + g.panel.table.fieldOverride.byName.withProperty('custom.width', 100) + g.panel.table.fieldOverride.byName.withProperty('custom.cellOptions', { type: 'color-text' }) + g.panel.table.fieldOverride.byName.withProperty('mappings', [ { options: { '1': { color: 'green', index: 0, text: 'Primary' }, '2': { color: 'yellow', index: 1, text: 'Secondary' } }, type: 'value' }, ]), - ]), + ]) + + g.panel.table.standardOptions.withOverridesMixin([ + g.panel.table.fieldOverride.byName.new('Replica set') + + g.panel.table.fieldOverride.byName.withProperty('custom.width', 250), + ]) + + g.panel.table.standardOptions.withOverridesMixin([ + g.panel.table.fieldOverride.byName.new('Cluster') + + g.panel.table.fieldOverride.byName.withProperty('custom.width', 300), + ]) + + g.panel.table.standardOptions.withOverridesMixin([ + g.panel.table.fieldOverride.byName.new('Group') + + g.panel.table.fieldOverride.byName.withProperty('custom.width', 300), + ]) + + g.panel.table.options.footer.withEnablePagination(true), configNodesTable: - g.panel.table.new('Config nodes') - + g.panel.table.panelOptions.withDescription('An inventory table for config nodes in the environment.') - + g.panel.table.queryOptions.withTargets([signals.cluster.configNodeRepresentativeMetric.asTableTarget()]) + commonlib.panels.generic.table.base.new( + 'Config nodes', + targets=[ + signals.cluster.configNodeRepresentativeMetric.asTableTarget(), + ], + description='An inventory table for config nodes in the environment.' + ) + g.panel.table.queryOptions.withTransformations([ - { id: 'reduce', options: { labelsToFields: true, reducers: ['lastNotNull'] } }, { id: 'organize', options: { excludeByName: { Field: true, 'Last *': true, __name__: true, job: true, org_id: true, process_port: true }, indexByName: { Field: 6, 'Last *': 11, __name__: 7, cl_name: 1, cl_role: 2, group_id: 0, instance: 3, job: 8, org_id: 9, process_port: 10, rs_nm: 4, rs_state: 5 }, renameByName: { cl_name: 'Cluster', cl_role: 'Role', group_id: 'Group', instance: 'Node', rs_nm: 'Replica set', rs_state: 'State' }, } }, - { id: 'filterByValue', options: { filters: [{ config: { id: 'equal', options: { value: 'configsvr' } }, fieldName: 'Role' }], match: 'all', type: 'include' } }, - ]) - + g.panel.table.standardOptions.color.withMode('thresholds') - + g.panel.table.standardOptions.withMappings([ - g.panel.table.standardOptions.mapping.ValueMap.withType() - + g.panel.table.standardOptions.mapping.ValueMap.withOptions({ - '1': { index: 0, text: 'Primary' }, - '2': { index: 1, text: 'Secondary' }, - }), - ]) - + g.panel.table.standardOptions.withOverrides([ - g.panel.table.fieldOverride.byName.new('cl_role') + { id: 'filterFieldsByName', options: { + include: { + names: [ + 'Group', + 'Cluster', + 'Role', + 'Node', + 'Replica set', + 'State', + ], + }, + } }, + ]) + + g.panel.table.standardOptions.withOverridesMixin([ + g.panel.table.fieldOverride.byName.new('Role') + g.panel.table.fieldOverride.byName.withProperty('custom.width', 150), - g.panel.table.fieldOverride.byName.new('rs_state') - + g.panel.table.fieldOverride.byName.withProperty('custom.width', 100), - g.panel.table.fieldOverride.byName.new('rs_nm') - + g.panel.table.fieldOverride.byName.withProperty('custom.width', 250), - g.panel.table.fieldOverride.byName.new('cl_name') - + g.panel.table.fieldOverride.byName.withProperty('custom.width', 300), - g.panel.table.fieldOverride.byName.new('group_id') - + g.panel.table.fieldOverride.byName.withProperty('custom.width', 300), + ]) + + g.panel.table.standardOptions.withOverridesMixin([ g.panel.table.fieldOverride.byName.new('State') + + g.panel.table.fieldOverride.byName.withProperty('custom.width', 100) + g.panel.table.fieldOverride.byName.withProperty('custom.cellOptions', { type: 'color-text' }) + g.panel.table.fieldOverride.byName.withProperty('mappings', [ { options: { '1': { color: 'green', index: 0, text: 'Primary' }, '2': { color: 'yellow', index: 1, text: 'Secondary' } }, type: 'value' }, ]), - ]), + ]) + + g.panel.table.standardOptions.withOverridesMixin([ + g.panel.table.fieldOverride.byName.new('Replica set') + + g.panel.table.fieldOverride.byName.withProperty('custom.width', 250), + ]) + + g.panel.table.standardOptions.withOverridesMixin([ + g.panel.table.fieldOverride.byName.new('Cluster') + + g.panel.table.fieldOverride.byName.withProperty('custom.width', 300), + ]) + + g.panel.table.standardOptions.withOverridesMixin([ + g.panel.table.fieldOverride.byName.new('Group') + + g.panel.table.fieldOverride.byName.withProperty('custom.width', 300), + ]) + + g.panel.table.options.footer.withEnablePagination(true), mongosNodesTable: - g.panel.table.new('mongos nodes') - + g.panel.table.panelOptions.withDescription('An inventory table for mongos nodes in the environment.') - + g.panel.table.queryOptions.withTargets([ - signals.cluster.mongosNodeRepresentativeMetric.asTableTarget(), - ]) + commonlib.panels.generic.table.base.new( + 'mongos nodes', + targets=[ + signals.cluster.mongosNodeRepresentativeMetric.asTableTarget(), + ], + description='An inventory table for mongos nodes in the environment.' + ) + g.panel.table.queryOptions.withTransformations([ - { id: 'reduce', options: { labelsToFields: true, reducers: ['lastNotNull'] } }, { id: 'organize', options: { excludeByName: { Field: true, 'Last *': true, __name__: true, job: true, org_id: true, process_port: true, rs_state: true }, indexByName: { Field: 6, 'Last *': 11, __name__: 7, cl_name: 1, cl_role: 2, group_id: 0, instance: 3, job: 8, org_id: 9, process_port: 10, rs_nm: 4, rs_state: 5 }, renameByName: { cl_name: 'Cluster', cl_role: 'Role', group_id: 'Group', instance: 'Node', rs_nm: 'Replica set' }, } }, - { id: 'filterByValue', options: { filters: [{ config: { id: 'equal', options: { value: 'mongos' } }, fieldName: 'Role' }], match: 'all', type: 'include' } }, - ]) - + g.panel.table.standardOptions.color.withMode('thresholds') - + g.panel.table.standardOptions.withMappings([ - g.panel.table.standardOptions.mapping.ValueMap.withType() - + g.panel.table.standardOptions.mapping.ValueMap.withOptions({ - '1': { index: 0, text: 'Primary' }, - '2': { index: 1, text: 'Secondary' }, - }), - ]) - + g.panel.table.standardOptions.withOverrides([ - g.panel.table.fieldOverride.byName.new('cl_role') + { id: 'filterFieldsByName', options: { + include: { + names: [ + 'Group', + 'Cluster', + 'Role', + 'Node', + 'Replica set', + 'State', + ], + }, + } }, + ]) + + g.panel.table.standardOptions.withOverridesMixin([ + g.panel.table.fieldOverride.byName.new('Role') + g.panel.table.fieldOverride.byName.withProperty('custom.width', 150), - g.panel.table.fieldOverride.byName.new('rs_state') - + g.panel.table.fieldOverride.byName.withProperty('custom.width', 100), - g.panel.table.fieldOverride.byName.new('rs_nm') + ]) + + g.panel.table.standardOptions.withOverridesMixin([ + g.panel.table.fieldOverride.byName.new('Replica set') + g.panel.table.fieldOverride.byName.withProperty('custom.width', 250), - g.panel.table.fieldOverride.byName.new('cl_name') + ]) + + g.panel.table.standardOptions.withOverridesMixin([ + g.panel.table.fieldOverride.byName.new('Cluster') + g.panel.table.fieldOverride.byName.withProperty('custom.width', 300), - g.panel.table.fieldOverride.byName.new('group_id') + ]) + + g.panel.table.standardOptions.withOverridesMixin([ + g.panel.table.fieldOverride.byName.new('Group') + g.panel.table.fieldOverride.byName.withProperty('custom.width', 300), - ]), + ]) + + g.panel.table.options.footer.withEnablePagination(true), // // Performance section panels @@ -136,9 +171,8 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription("The number of read and write I/O's processed.") + g.panel.timeSeries.standardOptions.withUnit('iops') - + g.panel.timeSeries.options.tooltip.withMode('multi') - + g.panel.timeSeries.options.tooltip.withSort('desc') - + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + g.panel.timeSeries.options.legend.withPlacement('right') + + g.panel.timeSeries.options.legend.withAsTable(true), hardwareIOWaitTime: commonlib.panels.generic.timeSeries.base.new('Hardware I/O wait time / $__interval', targets=[ @@ -149,10 +183,9 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('The amount of time spent waiting for I/O requests.') + g.panel.timeSeries.standardOptions.withUnit('ms') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc') - - + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + g.panel.timeSeries.options.legend.withPlacement('right') + + g.panel.timeSeries.options.legend.withAsTable(true), hardwareCPUInterruptServiceTime: commonlib.panels.generic.timeSeries.base.new('Hardware CPU interrupt service time / $__interval', targets=[ @@ -161,7 +194,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('The amount of time spent servicing CPU interrupts.') + g.panel.timeSeries.standardOptions.withUnit('ms') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc'), memoryUsed: @@ -171,7 +203,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('The amount of RAM and virtual memory being used.') + g.panel.timeSeries.standardOptions.withUnit('mbytes') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc'), diskSpaceUsage: @@ -182,7 +213,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; + g.panel.timeSeries.standardOptions.withUnit('percentunit') + g.panel.timeSeries.standardOptions.withMin(0) + g.panel.timeSeries.standardOptions.withMax(1) - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc'), networkRequests: @@ -191,55 +221,47 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('The number of distinct requests that the server has received.') + g.panel.timeSeries.standardOptions.withUnit('reqps') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc'), networkThroughput: - commonlib.panels.generic.timeSeries.base.new('Network throughput', targets=[ + commonlib.panels.network.timeSeries.traffic.new('Network throughput', targets=[ signals.cluster.networkBytesIn.asTarget(), signals.cluster.networkBytesOut.asTarget(), ]) + g.panel.timeSeries.panelOptions.withDescription('The number of bytes sent and received over network connections.') + g.panel.timeSeries.standardOptions.withUnit('Bps') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc') - - + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + g.panel.timeSeries.options.legend.withPlacement('right') + + g.panel.timeSeries.options.legend.withAsTable(true), slowRequests: - commonlib.panels.generic.timeSeries.base.new('Slow requests', targets=[ + commonlib.panels.network.timeSeries.traffic.new('Slow requests', targets=[ signals.cluster.networkSlowDNS.asTarget(), signals.cluster.networkSlowSSL.asTarget(), ]) + g.panel.timeSeries.panelOptions.withDescription('The rate of DNS and SSL operations that took longer than 1 second.') + g.panel.timeSeries.standardOptions.withUnit('reqps') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc'), - // - // Operations section panels - // - connections: - commonlib.panels.generic.timeSeries.base.new('Connections', targets=[ - signals.cluster.connectionsCreated.asTarget(), + commonlib.panels.network.timeSeries.base.new('Connections', targets=[ + signals.cluster.connectionsCreated.asTarget() + + g.query.prometheus.withInterval('2m'), ]) + g.panel.timeSeries.panelOptions.withDescription('The rate of incoming connections to the cluster created.') - + g.panel.timeSeries.standardOptions.withUnit('conns/s') - + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.standardOptions.withUnit('conns') + g.panel.timeSeries.options.tooltip.withSort('desc'), readwriteOperations: - commonlib.panels.generic.timeSeries.base.new('Read/Write operations', targets=[ + commonlib.panels.disk.timeSeries.iops.new('Read/Write operations', targets=[ signals.cluster.opLatenciesReadsOps.asTarget(), signals.cluster.opLatenciesWritesOps.asTarget(), ]) + g.panel.timeSeries.panelOptions.withDescription('The number of read and write operations.') + g.panel.timeSeries.standardOptions.withUnit('ops') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc') - - + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + g.panel.timeSeries.options.legend.withPlacement('right') + + g.panel.timeSeries.options.legend.withAsTable(true), operations: g.panel.pieChart.new('Operations') @@ -259,10 +281,12 @@ local commonlib = import 'common-lib/common/main.libsonnet'; + g.panel.pieChart.options.legend.withPlacement('bottom') + g.panel.pieChart.options.legend.withValues(['value']) + g.panel.pieChart.options.tooltip.withMode('multi') - + g.panel.pieChart.options.tooltip.withSort('desc'), + + g.panel.pieChart.options.tooltip.withSort('desc') + + g.panel.pieChart.options.legend.withAsTable(true) + + g.panel.pieChart.options.legend.withPlacement('right'), readwriteLatency: - commonlib.panels.generic.timeSeries.base.new('Read/Write latency / $__interval', targets=[ + commonlib.panels.disk.timeSeries.ioWaitTime.new('Read/Write latency / $__interval', targets=[ signals.cluster.opLatenciesReadsLatency.asTarget() + g.query.prometheus.withInterval('2m'), signals.cluster.opLatenciesWritesLatency.asTarget() @@ -270,9 +294,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('The latency for read and write operations.') + g.panel.timeSeries.standardOptions.withUnit('µs') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc') - + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), // @@ -285,9 +307,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; signals.cluster.globalLockQueueWriters.asTarget(), ]) + g.panel.timeSeries.panelOptions.withDescription('The number of reads and writes queued because of a lock.') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc') - + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), activeClientOperations: @@ -296,11 +316,10 @@ local commonlib = import 'common-lib/common/main.libsonnet'; signals.cluster.globalLockActiveWriters.asTarget(), ]) + g.panel.timeSeries.panelOptions.withDescription('The number of reads and writes being actively performed by connected clients.') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc') - + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + // only available in MongoDB 4.4+ databaseDeadlocks: commonlib.panels.generic.timeSeries.base.new('Database deadlocks / $__interval', targets=[ signals.cluster.dbDeadlockExclusive.asTarget() @@ -314,11 +333,9 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('The number of deadlocks for database level locks.') + g.panel.timeSeries.options.legend.withPlacement('right') - + g.panel.timeSeries.options.tooltip.withMode('multi') - + g.panel.timeSeries.options.tooltip.withSort('desc') - - + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + g.panel.timeSeries.options.legend.withAsTable(true), + // only available in MongoDB 4.4+ databaseWaitsAcquiringLock: commonlib.panels.generic.timeSeries.base.new('Database waits acquiring lock / $__interval', targets=[ signals.cluster.dbWaitCountExclusive.asTarget() @@ -332,13 +349,11 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('The number of times lock acquisitions encounter waits for database level locks.') + g.panel.timeSeries.options.legend.withPlacement('right') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc') - + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), // - // Elections section panels + // Elections panels // stepUpElectionsCalled: @@ -352,7 +367,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; + g.panel.timeSeries.options.legend.withPlacement('right') + g.panel.timeSeries.options.legend.withAsTable(true) + g.panel.timeSeries.standardOptions.withUnit('none') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc'), priorityElections: @@ -366,7 +380,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; + g.panel.timeSeries.options.legend.withAsTable(true) + g.panel.timeSeries.panelOptions.withDescription('The number of elections called and elections won by the node when it had a higher priority than the primary node.') + g.panel.timeSeries.standardOptions.withUnit('none') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc'), takeoverElections: @@ -380,7 +393,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; + g.panel.timeSeries.options.legend.withAsTable(true) + g.panel.timeSeries.panelOptions.withDescription('The number of elections called and elections won by the node when it was more current than the primary node.') + g.panel.timeSeries.standardOptions.withUnit('none') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc'), timeoutElections: @@ -394,7 +406,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; + g.panel.timeSeries.options.legend.withAsTable(true) + g.panel.timeSeries.panelOptions.withDescription('The number of elections called and elections won by the node when the time it took to reach the primary node exceeded the election timeout limit.') + g.panel.timeSeries.standardOptions.withUnit('none') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc'), catchUpsTotal: @@ -404,7 +415,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('The number of times the node had to catch up to the highest known oplog entry.') + g.panel.timeSeries.standardOptions.withUnit('none') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc'), catchUpsSkipped: @@ -414,7 +424,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('The number of times the node skipped the catch up process when it was the newly elected primary.') + g.panel.timeSeries.standardOptions.withUnit('none') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc'), catchUpsSucceeded: @@ -424,7 +433,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('The number of times the node succeeded in catching up when it was the newly elected primary.') + g.panel.timeSeries.standardOptions.withUnit('none') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc'), catchUpsFailed: @@ -434,7 +442,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('The number of times the node failed in catching up when it was the newly elected primary.') + g.panel.timeSeries.standardOptions.withUnit('none') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc'), catchUpsTimedOut: @@ -451,14 +458,12 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('The average number of operations done during the catch-up process when this node is the newly elected primary.') + g.panel.timeSeries.standardOptions.withUnit('none') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc'), // // Operations Overview dashboard panels // - // Section 1: Operation Counters (by type) - cluster-level aggregated insertOperations: commonlib.panels.generic.timeSeries.base.new('Insert operations', targets=[ signals.operations.opCountersInsert.asTarget() @@ -466,7 +471,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('The number of insert operations.') + g.panel.timeSeries.standardOptions.withUnit('none') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc'), queryOperations: @@ -476,7 +480,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('The number of query operations.') + g.panel.timeSeries.standardOptions.withUnit('none') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc'), updateOperations: @@ -486,7 +489,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('The number of update operations.') + g.panel.timeSeries.standardOptions.withUnit('none') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc'), deleteOperations: @@ -496,189 +498,15 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('The number of delete operations.') + g.panel.timeSeries.standardOptions.withUnit('none') - + g.panel.timeSeries.options.tooltip.withMode('multi') - + g.panel.timeSeries.options.tooltip.withSort('desc'), - - // Section 2: Operation Counters (by instance) - insertOperationsByInstance: - commonlib.panels.generic.timeSeries.base.new('Insert operations', targets=[ - signals.operations.opCountersInsertByInstance.asTarget(), - ]) - + g.panel.timeSeries.panelOptions.withDescription('The rate of insert operations the node has received.') - + g.panel.timeSeries.standardOptions.withUnit('ops') - + g.panel.timeSeries.options.tooltip.withMode('multi') - + g.panel.timeSeries.options.tooltip.withSort('desc') - - + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), - - queryOperationsByInstance: - commonlib.panels.generic.timeSeries.base.new('Query operations', targets=[ - signals.operations.opCountersQueryByInstance.asTarget(), - ]) - + g.panel.timeSeries.panelOptions.withDescription('The rate of query operations the node has received.') - + g.panel.timeSeries.standardOptions.withUnit('ops') - + g.panel.timeSeries.options.tooltip.withMode('multi') - + g.panel.timeSeries.options.tooltip.withSort('desc') - - + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), - - updateOperationsByInstance: - commonlib.panels.generic.timeSeries.base.new('Update operations', targets=[ - signals.operations.opCountersUpdate.asTarget(), - ]) - + g.panel.timeSeries.panelOptions.withDescription('The rate of update operations this node has received.') - + g.panel.timeSeries.standardOptions.withUnit('ops') - + g.panel.timeSeries.options.tooltip.withMode('multi') - + g.panel.timeSeries.options.tooltip.withSort('desc') - - + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), - - deleteOperationsByInstance: - commonlib.panels.generic.timeSeries.base.new('Delete operations', targets=[ - signals.operations.opCountersDeleteByInstance.asTarget(), - ]) - + g.panel.timeSeries.panelOptions.withDescription('The rate of delete operations this node has received.') - + g.panel.timeSeries.standardOptions.withUnit('ops') - + g.panel.timeSeries.options.tooltip.withMode('multi') - + g.panel.timeSeries.options.tooltip.withSort('desc') - - + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), - - // Section 3: Operation Latencies (cluster) - readOperationCount: - commonlib.panels.generic.timeSeries.base.new('Read operation count', targets=[ - signals.operations.opLatenciesReadsOps.asTarget(), - ]) - + g.panel.timeSeries.panelOptions.withDescription('The number of read operations.') - + g.panel.timeSeries.standardOptions.withUnit('ops') - + g.panel.timeSeries.options.tooltip.withMode('multi') - + g.panel.timeSeries.options.tooltip.withSort('desc'), - - writeOperationCount: - commonlib.panels.generic.timeSeries.base.new('Write operation count', targets=[ - signals.operations.opLatenciesWritesOps.asTarget(), - ]) - + g.panel.timeSeries.panelOptions.withDescription('The number of write operations.') - + g.panel.timeSeries.standardOptions.withUnit('ops') - + g.panel.timeSeries.options.tooltip.withMode('multi') - + g.panel.timeSeries.options.tooltip.withSort('desc'), - - readOperationLatency: - commonlib.panels.generic.timeSeries.base.new('Read operation latency / $__interval', targets=[ - signals.operations.opLatenciesReadsLatency.asTarget() - + g.query.prometheus.withInterval('2m'), - ]) - + g.panel.timeSeries.panelOptions.withDescription('The latency time for read operations performed by this node.') - + g.panel.timeSeries.standardOptions.withUnit('µs') - + g.panel.timeSeries.options.tooltip.withMode('multi') - + g.panel.timeSeries.options.tooltip.withSort('desc'), - - writeOperationLatency: - commonlib.panels.generic.timeSeries.base.new('Write operation latency / $__interval', targets=[ - signals.operations.opLatenciesWritesLatency.asTarget() - + g.query.prometheus.withInterval('2m'), - ]) - + g.panel.timeSeries.panelOptions.withDescription('The latency time for write operations performed by this node.') - + g.panel.timeSeries.standardOptions.withUnit('µs') - + g.panel.timeSeries.options.tooltip.withMode('multi') - + g.panel.timeSeries.options.tooltip.withSort('desc'), - - // Section 4: Operation Latencies (by instance) - readOperationCountByInstance: - commonlib.panels.generic.timeSeries.base.new('Read operation count', targets=[ - signals.operations.opLatenciesReadsOpsByInstance.asTarget(), - ]) - + g.panel.timeSeries.panelOptions.withDescription('The number of read operations per instance.') - + g.panel.timeSeries.standardOptions.withUnit('ops') - + g.panel.timeSeries.options.tooltip.withMode('multi') - + g.panel.timeSeries.options.tooltip.withSort('desc') - - + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), - - writeOperationCountByInstance: - commonlib.panels.generic.timeSeries.base.new('Write operation count', targets=[ - signals.operations.opLatenciesWritesOpsByInstance.asTarget(), - ]) - + g.panel.timeSeries.panelOptions.withDescription('The number of write operations per instance.') - + g.panel.timeSeries.standardOptions.withUnit('ops') - + g.panel.timeSeries.options.tooltip.withMode('multi') - + g.panel.timeSeries.options.tooltip.withSort('desc') - - + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), - - readOperationLatencyByInstance: - commonlib.panels.generic.timeSeries.base.new('Read operation latency / $__interval', targets=[ - signals.operations.opLatenciesReadsLatencyByInstance.asTarget() - + g.query.prometheus.withInterval('2m'), - ]) - + g.panel.timeSeries.panelOptions.withDescription('The latency time for read operations performed per instance.') - + g.panel.timeSeries.standardOptions.withUnit('µs') - + g.panel.timeSeries.options.tooltip.withMode('multi') - + g.panel.timeSeries.options.tooltip.withSort('desc') - - + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), - - writeOperationLatencyByInstance: - commonlib.panels.generic.timeSeries.base.new('Write operation latency / $__interval', targets=[ - signals.operations.opLatenciesWritesLatencyByInstance.asTarget() - + g.query.prometheus.withInterval('2m'), - ]) - + g.panel.timeSeries.panelOptions.withDescription('The latency time for write operations performed per instance.') - + g.panel.timeSeries.standardOptions.withUnit('µs') - + g.panel.timeSeries.options.tooltip.withMode('multi') - + g.panel.timeSeries.options.tooltip.withSort('desc') - - + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), - - // Section 5: Average Latencies (calculated) - avgReadLatency: - commonlib.panels.generic.timeSeries.base.new('Average read latency / $__interval', targets=[ - signals.operations.avgReadLatency.asTarget() - + g.query.prometheus.withInterval('2m'), - ]) - + g.panel.timeSeries.panelOptions.withDescription('Average latency per read operation.') - + g.panel.timeSeries.standardOptions.withUnit('µs') - + g.panel.timeSeries.options.tooltip.withMode('multi') - + g.panel.timeSeries.options.tooltip.withSort('desc'), - - avgWriteLatency: - commonlib.panels.generic.timeSeries.base.new('Average write latency / $__interval', targets=[ - signals.operations.avgWriteLatency.asTarget() - + g.query.prometheus.withInterval('2m'), - ]) - + g.panel.timeSeries.panelOptions.withDescription('Average latency per write operation.') - + g.panel.timeSeries.standardOptions.withUnit('µs') - + g.panel.timeSeries.options.tooltip.withMode('multi') - + g.panel.timeSeries.options.tooltip.withSort('desc'), - - avgReadLatencyByInstance: - commonlib.panels.generic.timeSeries.base.new('Average read latency / $__interval', targets=[ - signals.operations.avgReadLatencyByInstance.asTarget() - + g.query.prometheus.withInterval('2m'), - ]) - + g.panel.timeSeries.panelOptions.withDescription('Average latency per read operation by instance.') - + g.panel.timeSeries.standardOptions.withUnit('µs') - + g.panel.timeSeries.options.tooltip.withMode('multi') - + g.panel.timeSeries.options.tooltip.withSort('desc'), - - avgWriteLatencyByInstance: - commonlib.panels.generic.timeSeries.base.new('Average write latency / $__interval', targets=[ - signals.operations.avgWriteLatencyByInstance.asTarget() - + g.query.prometheus.withInterval('2m'), - ]) - + g.panel.timeSeries.panelOptions.withDescription('Average latency per write operation by instance.') - + g.panel.timeSeries.standardOptions.withUnit('µs') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc'), // Operations dashboard - connections currentConnectionsOperations: - commonlib.panels.generic.timeSeries.base.new('Current connections', targets=[ + commonlib.panels.network.timeSeries.base.new('Current connections', targets=[ signals.operations.connectionsCurrent.asTarget(), ]) + g.panel.timeSeries.panelOptions.withDescription('The number of incoming connections from clients to the node.') + g.panel.timeSeries.standardOptions.withUnit('none') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc'), activeConnectionsOperations: @@ -687,7 +515,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('The number of connections that currently have operations in progress.') + g.panel.timeSeries.standardOptions.withUnit('none') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc'), // Read/write operations (stacked) @@ -698,9 +525,8 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('The rate of read and write operations performed by the node.') + g.panel.timeSeries.standardOptions.withUnit('ops') - + g.panel.timeSeries.options.tooltip.withMode('multi') - + g.panel.timeSeries.options.tooltip.withSort('desc') - + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + g.panel.timeSeries.options.legend.withPlacement('right') + + g.panel.timeSeries.options.legend.withAsTable(true), readwriteLatencyOperations: commonlib.panels.generic.timeSeries.base.new('Read and write latency / $__interval', targets=[ @@ -711,9 +537,8 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('The latency time for read and write operations performed by this node.') + g.panel.timeSeries.standardOptions.withUnit('µs') - + g.panel.timeSeries.options.tooltip.withMode('multi') - + g.panel.timeSeries.options.tooltip.withSort('desc') - + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + g.panel.timeSeries.options.legend.withPlacement('right') + + g.panel.timeSeries.options.legend.withAsTable(true), // Operations dashboard - database locks databaseDeadlocksOperations: @@ -729,9 +554,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('The number of deadlocks that have occurred for the database lock.') + g.panel.timeSeries.standardOptions.withUnit('none') - + g.panel.timeSeries.options.tooltip.withMode('multi') - + g.panel.timeSeries.options.tooltip.withSort('desc') - + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + g.panel.timeSeries.options.tooltip.withSort('desc'), databaseWaitCountOperations: commonlib.panels.generic.timeSeries.base.new('Database wait count / $__interval', targets=[ @@ -746,7 +569,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('The number of database lock acquisitions that had to wait.') + g.panel.timeSeries.standardOptions.withUnit('none') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc') + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), @@ -763,7 +585,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('The time spent waiting for the database lock acquisition.') + g.panel.timeSeries.standardOptions.withUnit('µs') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc') + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), @@ -781,7 +602,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('The number of deadlocks that have occurred for the collection lock.') + g.panel.timeSeries.standardOptions.withUnit('none') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc') + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), @@ -798,7 +618,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('The number of collection lock acquisitions that had to wait.') + g.panel.timeSeries.standardOptions.withUnit('none') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc') + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), @@ -815,7 +634,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('The time spent waiting for the collection lock acquisition.') + g.panel.timeSeries.standardOptions.withUnit('µs') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc') + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), @@ -831,8 +649,8 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('The amount of RAM and virtual memory being used by the database process.') + g.panel.timeSeries.standardOptions.withUnit('mbytes') - + g.panel.timeSeries.options.tooltip.withMode('multi') - + g.panel.timeSeries.options.tooltip.withSort('desc'), + + g.panel.timeSeries.options.legend.withPlacement('right') + + g.panel.timeSeries.options.legend.withAsTable(true), hardwareCPUInterruptServiceTimePerformance: commonlib.panels.generic.timeSeries.base.new('Hardware CPU interrupt service time / $__interval', targets=[ @@ -841,18 +659,18 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('The amount of time spent servicing CPU interrupts.') + g.panel.timeSeries.standardOptions.withUnit('ms') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc'), diskSpacePerformance: - commonlib.panels.generic.timeSeries.base.new('Disk space', targets=[ + commonlib.panels.disk.timeSeries.usage.new('Disk space', targets=[ signals.performance.diskSpaceFree.asTarget(), signals.performance.diskSpaceUsed.asTarget(), ]) + g.panel.timeSeries.panelOptions.withDescription("The amount of free and used disk space on this node's hardware.") + g.panel.timeSeries.standardOptions.withUnit('decbytes') - + g.panel.timeSeries.options.tooltip.withMode('multi') - + g.panel.timeSeries.options.tooltip.withSort('desc'), + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.options.legend.withPlacement('right') + + g.panel.timeSeries.options.legend.withAsTable(true), diskSpaceUtilizationPerformance: commonlib.panels.generic.timeSeries.base.new('Disk space utilization', targets=[ @@ -862,7 +680,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; + g.panel.timeSeries.standardOptions.withUnit('percentunit') + g.panel.timeSeries.standardOptions.withMin(0) + g.panel.timeSeries.standardOptions.withMax(1) - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc'), networkRequestsPerformance: @@ -871,18 +688,18 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('The rate of distinct requests the node has received.') + g.panel.timeSeries.standardOptions.withUnit('reqps') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc'), slowNetworkRequestsPerformance: - commonlib.panels.generic.timeSeries.base.new('Slow network requests', targets=[ + commonlib.panels.network.timeSeries.traffic.new('Slow network requests', targets=[ signals.performance.networkSlowDNSByInstance.asTarget(), signals.performance.networkSlowSSLByInstance.asTarget(), ]) + g.panel.timeSeries.panelOptions.withDescription('The rate of slow DNS and SSL operations received by this node.') + g.panel.timeSeries.standardOptions.withUnit('reqps') - + g.panel.timeSeries.options.tooltip.withMode('multi') - + g.panel.timeSeries.options.tooltip.withSort('desc'), + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.options.legend.withPlacement('right') + + g.panel.timeSeries.options.legend.withAsTable(true), networkThroughputPerformance: commonlib.panels.generic.timeSeries.base.new('Network throughput', targets=[ @@ -891,20 +708,20 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('The rate of bytes sent and received by the node over a network connection.') + g.panel.timeSeries.standardOptions.withUnit('Bps') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc') - + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + g.panel.timeSeries.options.legend.withPlacement('right') + + g.panel.timeSeries.options.legend.withAsTable(true), hardwareIOPerformance: - commonlib.panels.generic.timeSeries.base.new('Hardware I/O', targets=[ + commonlib.panels.disk.timeSeries.iops.new('Hardware I/O', targets=[ signals.performance.diskReadCountByInstance.asTarget(), signals.performance.diskWriteCountByInstance.asTarget(), ]) + g.panel.timeSeries.panelOptions.withDescription("The rate of read and write I/O's processed by this node.") + g.panel.timeSeries.standardOptions.withUnit('iops') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc') - + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + g.panel.timeSeries.options.legend.withPlacement('right') + + g.panel.timeSeries.options.legend.withAsTable(true), hardwareIOWaitTimePerformance: commonlib.panels.generic.timeSeries.base.new('Hardware I/O wait time / $__interval', targets=[ @@ -915,35 +732,32 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription("The amount of time the node has spent waiting for read and write I/O's to process.") + g.panel.timeSeries.standardOptions.withUnit('ms') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc') - + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + g.panel.timeSeries.options.legend.withPlacement('right') + + g.panel.timeSeries.options.legend.withAsTable(true), - // Section 1: Connection Metrics currentConnections: - commonlib.panels.generic.timeSeries.base.new('Current connections', targets=[ + commonlib.panels.network.timeSeries.base.new('Current connections', targets=[ signals.performance.connectionsCurrent.asTarget(), ]) + g.panel.timeSeries.panelOptions.withDescription('The current number of active connections.') - + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.standardOptions.withUnit('conn') + g.panel.timeSeries.options.tooltip.withSort('desc'), activeConnections: - commonlib.panels.generic.timeSeries.base.new('Active connections', targets=[ + commonlib.panels.network.timeSeries.base.new('Active connections', targets=[ signals.performance.connectionsActive.asTarget(), ]) + g.panel.timeSeries.panelOptions.withDescription('The current number of connections with operations in progress.') - + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.standardOptions.withUnit('conn') + g.panel.timeSeries.options.tooltip.withSort('desc'), - // Section 2: Database Lock Deadlocks (Cluster) dbLockDeadlocksExclusive: commonlib.panels.generic.timeSeries.base.new('Database exclusive lock deadlocks / $__interval', targets=[ signals.performance.dbDeadlockExclusive.asTarget() + g.query.prometheus.withInterval('2m'), ]) + g.panel.timeSeries.panelOptions.withDescription('The number of database exclusive lock deadlocks.') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc'), dbLockDeadlocksIntentExclusive: @@ -952,7 +766,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; + g.query.prometheus.withInterval('2m'), ]) + g.panel.timeSeries.panelOptions.withDescription('The number of database intent-exclusive lock deadlocks.') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc'), dbLockDeadlocksShared: @@ -961,7 +774,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; + g.query.prometheus.withInterval('2m'), ]) + g.panel.timeSeries.panelOptions.withDescription('The number of database shared lock deadlocks.') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc'), dbLockDeadlocksIntentShared: @@ -970,7 +782,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; + g.query.prometheus.withInterval('2m'), ]) + g.panel.timeSeries.panelOptions.withDescription('The number of database intent-shared lock deadlocks.') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc'), // Section 3: Database Lock Deadlocks (By Instance) @@ -980,7 +791,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; + g.query.prometheus.withInterval('2m'), ]) + g.panel.timeSeries.panelOptions.withDescription('The number of database exclusive lock deadlocks per instance.') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc') + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), @@ -990,7 +800,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; + g.query.prometheus.withInterval('2m'), ]) + g.panel.timeSeries.panelOptions.withDescription('The number of database intent-exclusive lock deadlocks per instance.') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc') + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), @@ -1000,7 +809,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; + g.query.prometheus.withInterval('2m'), ]) + g.panel.timeSeries.panelOptions.withDescription('The number of database shared lock deadlocks per instance.') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc') + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), @@ -1010,7 +818,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; + g.query.prometheus.withInterval('2m'), ]) + g.panel.timeSeries.panelOptions.withDescription('The number of database intent-shared lock deadlocks per instance.') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc') + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), @@ -1021,7 +828,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; + g.query.prometheus.withInterval('2m'), ]) + g.panel.timeSeries.panelOptions.withDescription('The number of times lock acquisitions encounter waits for database exclusive locks.') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc'), dbLockWaitCountIntentExclusive: @@ -1030,7 +836,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; + g.query.prometheus.withInterval('2m'), ]) + g.panel.timeSeries.panelOptions.withDescription('The number of times lock acquisitions encounter waits for database intent-exclusive locks.') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc'), dbLockWaitCountShared: @@ -1039,7 +844,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; + g.query.prometheus.withInterval('2m'), ]) + g.panel.timeSeries.panelOptions.withDescription('The number of times lock acquisitions encounter waits for database shared locks.') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc'), dbLockWaitCountIntentShared: @@ -1048,7 +852,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; + g.query.prometheus.withInterval('2m'), ]) + g.panel.timeSeries.panelOptions.withDescription('The number of times lock acquisitions encounter waits for database intent-shared locks.') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc'), // Section 5: Database Lock Wait Counts (By Instance) @@ -1058,7 +861,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; + g.query.prometheus.withInterval('2m'), ]) + g.panel.timeSeries.panelOptions.withDescription('The number of times lock acquisitions encounter waits for database exclusive locks per instance.') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc') + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), @@ -1068,9 +870,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; + g.query.prometheus.withInterval('2m'), ]) + g.panel.timeSeries.panelOptions.withDescription('The number of times lock acquisitions encounter waits for database intent-exclusive locks per instance.') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc') - + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), dbLockWaitCountSharedByInstance: @@ -1079,7 +879,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; + g.query.prometheus.withInterval('2m'), ]) + g.panel.timeSeries.panelOptions.withDescription('The number of times lock acquisitions encounter waits for database shared locks per instance.') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc') + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), @@ -1089,9 +888,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; + g.query.prometheus.withInterval('2m'), ]) + g.panel.timeSeries.panelOptions.withDescription('The number of times lock acquisitions encounter waits for database intent-shared locks per instance.') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc') - + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), // Section 6: Database Lock Acquisition Time @@ -1102,9 +899,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('The time spent acquiring database exclusive locks.') + g.panel.timeSeries.standardOptions.withUnit('µs') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc') - + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), dbLockAcqTimeIntentExclusive: @@ -1114,7 +909,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('The time spent acquiring database intent-exclusive locks.') + g.panel.timeSeries.standardOptions.withUnit('µs') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc') + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), @@ -1125,7 +919,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('The time spent acquiring database shared locks.') + g.panel.timeSeries.standardOptions.withUnit('µs') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc') + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), @@ -1136,9 +929,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('The time spent acquiring database intent-shared locks.') + g.panel.timeSeries.standardOptions.withUnit('µs') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc') - + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), // Section 7: Collection Lock Deadlocks @@ -1148,7 +939,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; + g.query.prometheus.withInterval('2m'), ]) + g.panel.timeSeries.panelOptions.withDescription('The number of collection exclusive lock deadlocks.') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc') + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), @@ -1158,7 +948,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; + g.query.prometheus.withInterval('2m'), ]) + g.panel.timeSeries.panelOptions.withDescription('The number of collection intent-exclusive lock deadlocks.') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc') + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), @@ -1168,7 +957,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; + g.query.prometheus.withInterval('2m'), ]) + g.panel.timeSeries.panelOptions.withDescription('The number of collection shared lock deadlocks.') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc') + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), @@ -1178,7 +966,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; + g.query.prometheus.withInterval('2m'), ]) + g.panel.timeSeries.panelOptions.withDescription('The number of collection intent-shared lock deadlocks.') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc') + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), @@ -1189,7 +976,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; + g.query.prometheus.withInterval('2m'), ]) + g.panel.timeSeries.panelOptions.withDescription('The number of times lock acquisitions encounter waits for collection exclusive locks.') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc') + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), @@ -1199,9 +985,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; + g.query.prometheus.withInterval('2m'), ]) + g.panel.timeSeries.panelOptions.withDescription('The number of times lock acquisitions encounter waits for collection intent-exclusive locks.') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc') - + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), collLockWaitCountShared: @@ -1210,9 +994,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; + g.query.prometheus.withInterval('2m'), ]) + g.panel.timeSeries.panelOptions.withDescription('The number of times lock acquisitions encounter waits for collection shared locks.') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc') - + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), collLockWaitCountIntentShared: @@ -1221,9 +1003,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; + g.query.prometheus.withInterval('2m'), ]) + g.panel.timeSeries.panelOptions.withDescription('The number of times lock acquisitions encounter waits for collection intent-shared locks.') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc') - + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), // Section 9: Collection Lock Acquisition Time @@ -1234,9 +1014,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('The time spent acquiring collection exclusive locks.') + g.panel.timeSeries.standardOptions.withUnit('µs') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc') - + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), collLockAcqTimeIntentExclusive: @@ -1246,9 +1024,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('The time spent acquiring collection intent-exclusive locks.') + g.panel.timeSeries.standardOptions.withUnit('µs') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc') - + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), collLockAcqTimeShared: @@ -1258,9 +1034,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('The time spent acquiring collection shared locks.') + g.panel.timeSeries.standardOptions.withUnit('µs') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc') - + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), collLockAcqTimeIntentShared: @@ -1270,16 +1044,13 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('The time spent acquiring collection intent-shared locks.') + g.panel.timeSeries.standardOptions.withUnit('µs') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc') - + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), // // Sharding Overview dashboard panels // - // General sharding statistics staleConfigErrors: commonlib.panels.generic.timeSeries.base.new('Stale configs / $__interval', targets=[ signals.sharding.staleConfigErrors.asTarget() @@ -1287,7 +1058,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('Number of times that a thread hit a stale config exception and triggered a metadata refresh.') + g.panel.timeSeries.standardOptions.withUnit('none') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc'), chunkMigrations: @@ -1297,7 +1067,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('Chunk migration frequency for this node.') + g.panel.timeSeries.standardOptions.withUnit('none') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc'), docsCloned: @@ -1309,7 +1078,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('The number of documents cloned on this node when it acted as primary for the donor and acted as primary for the recipient.') + g.panel.timeSeries.standardOptions.withUnit('none') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc') + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), @@ -1320,7 +1088,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('The time taken by the catch-up and update metadata phases of a range migration, by this node.') + g.panel.timeSeries.standardOptions.withUnit('ms') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc'), // Catalog cache panels @@ -1333,7 +1100,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('The number of incremental and full refreshes that have started.') + g.panel.timeSeries.standardOptions.withUnit('none') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc') + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), @@ -1344,7 +1110,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('The number of full and incremental refreshes that have failed.') + g.panel.timeSeries.standardOptions.withUnit('none') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc'), catalogCacheStaleConfigs: @@ -1354,7 +1119,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('The number of times that a thread hit a stale config exception for the catalog cache and triggered a metadata refresh.') + g.panel.timeSeries.standardOptions.withUnit('none') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc'), catalogCacheEntries: @@ -1366,7 +1130,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('The number of database and collection entries that are currently in the catalog cache.') + g.panel.timeSeries.standardOptions.withUnit('none') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc'), catalogCacheRefreshTime: @@ -1376,7 +1139,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('The amount of time that threads had to wait for a refresh of the metadata.') + g.panel.timeSeries.standardOptions.withUnit('µs') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc'), catalogCacheOperationsBlocked: @@ -1385,7 +1147,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; ]) + g.panel.timeSeries.panelOptions.withDescription('The rate of operations that are blocked by a refresh of the catalog cache. Specific to mongos nodes found under replica set "none".') + g.panel.timeSeries.standardOptions.withUnit('ops') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc'), // Shard targeting operations panels @@ -1400,7 +1161,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; + g.panel.timeSeries.panelOptions.withDescription('The rate of CRUD operations and aggregation commands run that targeted all shards. Specific to mongos nodes found under replica set "none".') + g.panel.timeSeries.standardOptions.withUnit('ops') + g.panel.timeSeries.options.legend.withPlacement('right') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc'), shardTargetingManyShards: @@ -1414,7 +1174,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; + g.panel.timeSeries.panelOptions.withDescription('The rate of CRUD operations and aggregation commands run that targeted more than 1 shard. Specific to mongos nodes found under replica set "none".') + g.panel.timeSeries.standardOptions.withUnit('ops') + g.panel.timeSeries.options.legend.withPlacement('right') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc'), shardTargetingOneShard: @@ -1428,7 +1187,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; + g.panel.timeSeries.panelOptions.withDescription('The rate of CRUD operations and aggregation commands run that targeted 1 shard. Specific to mongos nodes found under replica set "none".') + g.panel.timeSeries.standardOptions.withUnit('ops') + g.panel.timeSeries.options.legend.withPlacement('right') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc'), shardTargetingUnsharded: @@ -1442,7 +1200,6 @@ local commonlib = import 'common-lib/common/main.libsonnet'; + g.panel.timeSeries.panelOptions.withDescription('The rate of CRUD operations and aggregation commands run on an unsharded collection. Specific to mongos nodes found under replica set "none".') + g.panel.timeSeries.standardOptions.withUnit('ops') + g.panel.timeSeries.options.legend.withPlacement('right') - + g.panel.timeSeries.options.tooltip.withMode('multi') + g.panel.timeSeries.options.tooltip.withSort('desc'), }, } diff --git a/mongodb-atlas-mixin/rows.libsonnet b/mongodb-atlas-mixin/rows.libsonnet index 901a4cf5b..bfcab2d5a 100644 --- a/mongodb-atlas-mixin/rows.libsonnet +++ b/mongodb-atlas-mixin/rows.libsonnet @@ -94,26 +94,16 @@ local g = import './g.libsonnet'; // Operations overview dashboard rows // - operationsCountersClusterRow: - g.panel.row.new('Operation counters - cluster') + operationsRow: + g.panel.row.new('Operations') + g.panel.row.withCollapsed(false) + g.panel.row.withPanels([ - panels.insertOperations { gridPos+: { w: 12 } }, panels.queryOperations { gridPos+: { w: 12 } }, + panels.insertOperations { gridPos+: { w: 12 } }, panels.updateOperations { gridPos+: { w: 12 } }, panels.deleteOperations { gridPos+: { w: 12 } }, ]), - operationsCountersInstanceRow: - g.panel.row.new('Operation counters - instance') - + g.panel.row.withCollapsed(false) - + g.panel.row.withPanels([ - panels.insertOperationsByInstance { gridPos+: { w: 12 } }, - panels.queryOperationsByInstance { gridPos+: { w: 12 } }, - panels.updateOperationsByInstance { gridPos+: { w: 12 } }, - panels.deleteOperationsByInstance { gridPos+: { w: 12 } }, - ]), - operationsConnectionsRow: g.panel.row.new('Connections') + g.panel.row.withCollapsed(false) @@ -142,36 +132,6 @@ local g = import './g.libsonnet'; panels.collectionWaitTimeOperations { gridPos+: { w: 8 } }, ]), - operationsLatenciesClusterRow: - g.panel.row.new('Operation latencies - cluster') - + g.panel.row.withCollapsed(false) - + g.panel.row.withPanels([ - panels.readOperationCount { gridPos+: { w: 12 } }, - panels.writeOperationCount { gridPos+: { w: 12 } }, - panels.readOperationLatency { gridPos+: { w: 12 } }, - panels.writeOperationLatency { gridPos+: { w: 12 } }, - ]), - - operationsLatenciesInstanceRow: - g.panel.row.new('Operation latencies - instance') - + g.panel.row.withCollapsed(false) - + g.panel.row.withPanels([ - panels.readOperationCountByInstance { gridPos+: { w: 12 } }, - panels.writeOperationCountByInstance { gridPos+: { w: 12 } }, - panels.readOperationLatencyByInstance { gridPos+: { w: 12 } }, - panels.writeOperationLatencyByInstance { gridPos+: { w: 12 } }, - ]), - - operationsAvgLatenciesRow: - g.panel.row.new('Average latencies') - + g.panel.row.withCollapsed(false) - + g.panel.row.withPanels([ - panels.avgReadLatency { gridPos+: { w: 12 } }, - panels.avgWriteLatency { gridPos+: { w: 12 } }, - panels.avgReadLatencyByInstance { gridPos+: { w: 12 } }, - panels.avgWriteLatencyByInstance { gridPos+: { w: 12 } }, - ]), - // // Performance overview dashboard rows // diff --git a/mongodb-atlas-mixin/signals/cluster.libsonnet b/mongodb-atlas-mixin/signals/cluster.libsonnet index e8fe04716..12df3b05f 100644 --- a/mongodb-atlas-mixin/signals/cluster.libsonnet +++ b/mongodb-atlas-mixin/signals/cluster.libsonnet @@ -17,7 +17,7 @@ function(this) unit: 'none', sources: { prometheus: { - expr: 'mongodb_network_bytesIn{%(queriesSelector)s, rs_nm=~"$rs_nm"}', // representative metric for a table + expr: 'mongodb_network_bytesIn{%(queriesSelector)s, rs_nm=~"$rs_nm", cl_role="shardsvr"}', // representative metric for a table legendCustomTemplate: '', }, }, @@ -31,7 +31,7 @@ function(this) unit: 'none', sources: { prometheus: { - expr: 'mongodb_network_bytesIn{%(queriesSelector)s, rs_nm=~"$rs_nm"}', // representative metric for a table + expr: 'mongodb_network_bytesIn{%(queriesSelector)s, rs_nm=~"$rs_nm", cl_role="configsvr"}', // representative metric for a table legendCustomTemplate: '', }, }, @@ -45,7 +45,7 @@ function(this) unit: 'none', sources: { prometheus: { - expr: 'mongodb_network_bytesIn{%(queriesSelector)s, rs_nm=~"$rs_nm"}', // representative metric for a table + expr: 'mongodb_network_bytesIn{%(queriesSelector)s, rs_nm=~"$rs_nm", cl_role="mongos"}', // representative metric for a table legendCustomTemplate: '', }, }, @@ -124,7 +124,7 @@ function(this) unit: 'percentunit', sources: { prometheus: { - expr: '(sum without (disk_name) (hardware_disk_metrics_disk_space_used_bytes{%(queriesSelector)s})) / clamp_min((sum without (disk_name) (hardware_disk_metrics_disk_space_used_bytes{%(queriesSelector)s})) + (sum without (disk_name) (hardware_disk_metrics_disk_space_free_bytes{%(queriesSelector)s})), 1)', + expr: '(sum (hardware_disk_metrics_disk_space_used_bytes{%(queriesSelector)s}) by (cl_name)) / clamp_min((sum (hardware_disk_metrics_disk_space_used_bytes{%(queriesSelector)s}) by (cl_name)) + (sum (hardware_disk_metrics_disk_space_free_bytes{%(queriesSelector)s}) by (cl_name)), 1)', legendCustomTemplate: '{{cl_name}}', }, }, @@ -212,12 +212,12 @@ function(this) networkSlowSSL: { name: 'Slow SSL operations', - type: 'counter', + type: 'raw', description: 'Number of slow SSL operations (>1s).', unit: 'ops', sources: { prometheus: { - expr: 'mongodb_network_numSlowSSLOperations{%(queriesSelector)s, rs_nm=~"$rs_nm"}', + expr: 'sum(rate(mongodb_network_numSlowSSLOperations{%(queriesSelector)s, rs_nm=~"$rs_nm"}[$__rate_interval])) by (cl_name)', legendCustomTemplate: '{{cl_name}} - SSL', }, }, @@ -231,7 +231,7 @@ function(this) unit: 'conns', sources: { prometheus: { - expr: 'sum(rate(mongodb_connections_totalCreated{%(queriesSelector)s}[$__rate_interval])) by (cl_name)', + expr: 'sum(increase(mongodb_connections_totalCreated{%(queriesSelector)s}[$__interval:] offset -$__interval)) by (cl_name)', legendCustomTemplate: '{{cl_name}}', }, }, diff --git a/mongodb-atlas-mixin/signals/elections.libsonnet b/mongodb-atlas-mixin/signals/elections.libsonnet index 6d7a72367..6375d4927 100644 --- a/mongodb-atlas-mixin/signals/elections.libsonnet +++ b/mongodb-atlas-mixin/signals/elections.libsonnet @@ -193,7 +193,8 @@ function(this) averageCatchUpOps: { name: 'Average catch-up operations', type: 'gauge', - aggLevel: 'none', + aggLevel: 'instance', + aggFunction: 'sum', description: 'Average catch-up operations.', unit: 'none', sources: { diff --git a/mongodb-atlas-mixin/signals/operations.libsonnet b/mongodb-atlas-mixin/signals/operations.libsonnet index ac0c2f3f8..c3471f0b1 100644 --- a/mongodb-atlas-mixin/signals/operations.libsonnet +++ b/mongodb-atlas-mixin/signals/operations.libsonnet @@ -4,21 +4,19 @@ function(this) groupLabels: this.groupLabels, instanceLabels: this.instanceLabels, enableLokiLogs: this.enableLokiLogs, - aggLevel: 'group', + aggLevel: 'none', aggFunction: 'sum', signals: { - // Operation counters (cluster-level) opCountersInsert: { name: 'Insert operations', type: 'counter', description: 'Number of insert operations.', - unit: 'none', + unit: 'ops/s', sources: { prometheus: { expr: 'mongodb_opcounters_insert{%(queriesSelector)s, rs_nm=~"$rs_nm"}', - rangeFunction: 'increase', - legendCustomTemplate: '{{cl_name}} - insert', + legendCustomTemplate: '{{instance}}', }, }, }, @@ -27,12 +25,11 @@ function(this) name: 'Query operations', type: 'counter', description: 'Number of query operations.', - unit: 'none', + unit: 'ops/s', sources: { prometheus: { expr: 'mongodb_opcounters_query{%(queriesSelector)s, rs_nm=~"$rs_nm"}', - rangeFunction: 'increase', - legendCustomTemplate: '{{cl_name}} - query', + legendCustomTemplate: '{{instance}}', }, }, }, @@ -41,40 +38,10 @@ function(this) name: 'Delete operations', type: 'counter', description: 'Number of delete operations.', - unit: 'none', + unit: 'ops/s', sources: { prometheus: { expr: 'mongodb_opcounters_delete{%(queriesSelector)s, rs_nm=~"$rs_nm"}', - rangeFunction: 'increase', - legendCustomTemplate: '{{cl_name}} - delete', - }, - }, - }, - - // Operation counters (by instance) - opCountersInsertByInstance: { - name: 'Insert operations by instance', - type: 'counter', - aggLevel: 'none', - description: 'Number of insert operations per instance.', - unit: 'ops', - sources: { - prometheus: { - expr: 'mongodb_opcounters_insert{%(queriesSelector)s, rs_nm=~"$rs_nm"}', - legendCustomTemplate: '{{instance}}', - }, - }, - }, - - opCountersQueryByInstance: { - name: 'Query operations by instance', - type: 'counter', - aggLevel: 'none', - description: 'Number of query operations per instance.', - unit: 'ops', - sources: { - prometheus: { - expr: 'mongodb_opcounters_query{%(queriesSelector)s, rs_nm=~"$rs_nm"}', legendCustomTemplate: '{{instance}}', }, }, @@ -83,9 +50,8 @@ function(this) opCountersUpdate: { name: 'Update operations', type: 'counter', - aggLevel: 'none', description: 'Number of update operations.', - unit: 'ops', + unit: 'ops/s', sources: { prometheus: { expr: 'mongodb_opcounters_update{%(queriesSelector)s, rs_nm=~"$rs_nm"}', @@ -94,187 +60,6 @@ function(this) }, }, - opCountersDeleteByInstance: { - name: 'Delete operations by instance', - type: 'counter', - aggLevel: 'none', - description: 'Number of delete operations per instance.', - unit: 'ops', - sources: { - prometheus: { - expr: 'mongodb_opcounters_delete{%(queriesSelector)s, rs_nm=~"$rs_nm"}', - legendCustomTemplate: '{{instance}}', - }, - }, - }, - - // Operation latencies (cluster-level) - opLatenciesReadsOps: { - name: 'Read operation count', - type: 'counter', - description: 'Number of read operations.', - unit: 'ops', - sources: { - prometheus: { - expr: 'mongodb_opLatencies_reads_ops{%(queriesSelector)s, rs_nm=~"$rs_nm"}', - legendCustomTemplate: '{{cl_name}} - reads', - }, - }, - }, - - opLatenciesWritesOps: { - name: 'Write operation count', - type: 'counter', - description: 'Number of write operations.', - unit: 'ops', - sources: { - prometheus: { - expr: 'mongodb_opLatencies_writes_ops{%(queriesSelector)s, rs_nm=~"$rs_nm"}', - legendCustomTemplate: '{{cl_name}} - writes', - }, - }, - }, - - opLatenciesReadsLatency: { - name: 'Read operation latency', - type: 'counter', - description: 'Total read operation latency.', - unit: 'µs', - sources: { - prometheus: { - expr: 'mongodb_opLatencies_reads_latency{%(queriesSelector)s, rs_nm=~"$rs_nm"}', - rangeFunction: 'increase', - legendCustomTemplate: '{{cl_name}} - reads', - }, - }, - }, - - opLatenciesWritesLatency: { - name: 'Write operation latency', - type: 'counter', - description: 'Total write operation latency.', - unit: 'µs', - sources: { - prometheus: { - expr: 'mongodb_opLatencies_writes_latency{%(queriesSelector)s, rs_nm=~"$rs_nm"}', - rangeFunction: 'increase', - legendCustomTemplate: '{{cl_name}} - writes', - }, - }, - }, - - // Operation latencies (by instance) - opLatenciesReadsOpsByInstance: { - name: 'Read operation count by instance', - type: 'counter', - aggLevel: 'none', - description: 'Number of read operations per instance.', - unit: 'ops', - sources: { - prometheus: { - expr: 'mongodb_opLatencies_reads_ops{%(queriesSelector)s, rs_nm=~"$rs_nm"}', - legendCustomTemplate: '{{instance}} - reads', - }, - }, - }, - - opLatenciesWritesOpsByInstance: { - name: 'Write operation count by instance', - type: 'counter', - aggLevel: 'none', - description: 'Number of write operations per instance.', - unit: 'ops', - sources: { - prometheus: { - expr: 'mongodb_opLatencies_writes_ops{%(queriesSelector)s, rs_nm=~"$rs_nm"}', - legendCustomTemplate: '{{instance}} - writes', - }, - }, - }, - - opLatenciesReadsLatencyByInstance: { - name: 'Read operation latency by instance', - type: 'counter', - aggLevel: 'none', - description: 'Total read operation latency per instance.', - unit: 'µs', - sources: { - prometheus: { - expr: 'mongodb_opLatencies_reads_latency{%(queriesSelector)s, rs_nm=~"$rs_nm"}', - rangeFunction: 'increase', - legendCustomTemplate: '{{instance}} - reads', - }, - }, - }, - - opLatenciesWritesLatencyByInstance: { - name: 'Write operation latency by instance', - type: 'counter', - aggLevel: 'none', - description: 'Total write operation latency per instance.', - unit: 'µs', - sources: { - prometheus: { - expr: 'mongodb_opLatencies_writes_latency{%(queriesSelector)s, rs_nm=~"$rs_nm"}', - rangeFunction: 'increase', - legendCustomTemplate: '{{instance}} - writes', - }, - }, - }, - - // Average latency calculations - avgReadLatency: { - name: 'Average read latency', - type: 'raw', - description: 'Average latency per read operation.', - unit: 'µs', - sources: { - prometheus: { - expr: 'sum (increase(mongodb_opLatencies_reads_latency{%(queriesSelector)s, rs_nm=~"$rs_nm"}[$__interval:])) by (job, cl_name) / clamp_min(sum (increase(mongodb_opLatencies_reads_ops{%(queriesSelector)s, rs_nm=~"$rs_nm"}[$__interval:])) by (job, cl_name), 1)', - legendCustomTemplate: '{{cl_name}} - reads', - }, - }, - }, - - avgWriteLatency: { - name: 'Average write latency', - type: 'raw', - description: 'Average latency per write operation.', - unit: 'µs', - sources: { - prometheus: { - expr: 'sum (increase(mongodb_opLatencies_writes_latency{%(queriesSelector)s, rs_nm=~"$rs_nm"}[$__interval:])) by (job, cl_name) / clamp_min(sum (increase(mongodb_opLatencies_writes_ops{%(queriesSelector)s, rs_nm=~"$rs_nm"}[$__interval:])) by (job, cl_name), 1)', - legendCustomTemplate: '{{cl_name}} - writes', - }, - }, - }, - - avgReadLatencyByInstance: { - name: 'Average read latency by instance', - type: 'raw', - description: 'Average latency per read operation by instance.', - unit: 'µs', - sources: { - prometheus: { - expr: 'increase(mongodb_opLatencies_reads_latency{%(queriesSelector)s, rs_nm=~"$rs_nm"}[$__interval:]) / clamp_min(increase(mongodb_opLatencies_reads_ops{%(queriesSelector)s, rs_nm=~"$rs_nm"}[$__interval:]), 1)', - legendCustomTemplate: '{{instance}} - reads', - }, - }, - }, - - avgWriteLatencyByInstance: { - name: 'Average write latency by instance', - type: 'raw', - description: 'Average latency per write operation by instance.', - unit: 'µs', - sources: { - prometheus: { - expr: 'increase(mongodb_opLatencies_writes_latency{%(queriesSelector)s, rs_nm=~"$rs_nm"}[$__interval:]) / clamp_min(increase(mongodb_opLatencies_writes_ops{%(queriesSelector)s, rs_nm=~"$rs_nm"}[$__interval:]), 1)', - legendCustomTemplate: '{{instance}} - writes', - }, - }, - }, - // Connection signals connectionsCurrent: { name: 'Current connections',