From 4aef7559bb9e23a34608e7de73cf32011709d870 Mon Sep 17 00:00:00 2001 From: schmikei Date: Thu, 6 Nov 2025 14:35:29 -0500 Subject: [PATCH 1/4] modernize the hbase mixin --- .../{alerts => }/alerts.libsonnet | 26 +- apache-hbase-mixin/config.libsonnet | 43 +- apache-hbase-mixin/dashboards.libsonnet | 105 ++ .../apache-hbase-cluster-overview.libsonnet | 1121 -------------- .../apache-hbase-logs-overview.libsonnet | 32 - ...ache-hbase-regionserver-overview.libsonnet | 1300 ----------------- .../dashboards/dashboards.libsonnet | 3 - .../apache-hbase-cluster-overview.json | 876 ++++------- ...s.json => apache-hbase-logs-overview.json} | 73 +- .../apache-hbase-regionserver-overview.json | 1174 +++++---------- apache-hbase-mixin/g.libsonnet | 1 + apache-hbase-mixin/jsonnetfile.json | 17 +- apache-hbase-mixin/links.libsonnet | 28 + apache-hbase-mixin/main.libsonnet | 48 + apache-hbase-mixin/mixin.libsonnet | 35 +- apache-hbase-mixin/panels.libsonnet | 466 ++++++ apache-hbase-mixin/rows.libsonnet | 55 + apache-hbase-mixin/signals/cluster.libsonnet | 316 ++++ .../signals/regionserver.libsonnet | 398 +++++ 19 files changed, 2180 insertions(+), 3937 deletions(-) rename apache-hbase-mixin/{alerts => }/alerts.libsonnet (88%) create mode 100644 apache-hbase-mixin/dashboards.libsonnet delete mode 100644 apache-hbase-mixin/dashboards/apache-hbase-cluster-overview.libsonnet delete mode 100644 apache-hbase-mixin/dashboards/apache-hbase-logs-overview.libsonnet delete mode 100644 apache-hbase-mixin/dashboards/apache-hbase-regionserver-overview.libsonnet delete mode 100644 apache-hbase-mixin/dashboards/dashboards.libsonnet rename apache-hbase-mixin/dashboards_out/{apache-hbase-logs.json => apache-hbase-logs-overview.json} (82%) create mode 100644 apache-hbase-mixin/g.libsonnet create mode 100644 apache-hbase-mixin/links.libsonnet create mode 100644 apache-hbase-mixin/main.libsonnet create mode 100644 apache-hbase-mixin/panels.libsonnet create mode 100644 apache-hbase-mixin/rows.libsonnet create mode 100644 apache-hbase-mixin/signals/cluster.libsonnet create mode 100644 apache-hbase-mixin/signals/regionserver.libsonnet diff --git a/apache-hbase-mixin/alerts/alerts.libsonnet b/apache-hbase-mixin/alerts.libsonnet similarity index 88% rename from apache-hbase-mixin/alerts/alerts.libsonnet rename to apache-hbase-mixin/alerts.libsonnet index 851a12319..3f3557d06 100644 --- a/apache-hbase-mixin/alerts/alerts.libsonnet +++ b/apache-hbase-mixin/alerts.libsonnet @@ -1,14 +1,14 @@ { - prometheusAlerts+:: { - groups+: [ + new(this): { + groups: [ { name: 'apache-hbase-alerts', rules: [ { alert: 'HBaseHighHeapMemUsage', expr: ||| - 100 * sum without(context, hostname, processname) (jvm_metrics_mem_heap_used_m{%(filterSelector)s} / clamp_min(jvm_metrics_mem_heap_committed_m{%(filterSelector)s}, 1)) > %(alertsHighHeapMemUsage)s - ||| % $._config, + 100 * sum without(context, hostname, processname) (jvm_metrics_mem_heap_used_m{%(filteringSelector)s} / clamp_min(jvm_metrics_mem_heap_committed_m{%(filteringSelector)s}, 1)) > %(alertsHighHeapMemUsage)s + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -18,14 +18,14 @@ description: ( 'The heap memory usage for the JVM on instance {{$labels.instance}} in cluster {{$labels.hbase_cluster}} is {{printf "%%.0f" $value}} percent, which is above the threshold of %(alertsHighHeapMemUsage)s percent' - ) % $._config, + ) % this.config, }, }, { alert: 'HBaseDeadRegionServer', expr: ||| server_num_dead_region_servers > %(alertsDeadRegionServer)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -35,14 +35,14 @@ description: ( '{{$value}} RegionServer(s) in cluster {{$labels.hbase_cluster}} are unresponsive, which is above the threshold of %(alertsDeadRegionServer)s. The name(s) of the dead RegionServer(s) are {{$labels.deadregionservers}}' - ) % $._config, + ) % this.config, }, }, { alert: 'HBaseOldRegionsInTransition', expr: ||| 100 * assignment_manager_rit_count_over_threshold / clamp_min(assignment_manager_rit_count, 1) > %(alertsOldRegionsInTransition)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -52,14 +52,14 @@ description: ( '{{printf "%%.0f" $value}} percent of RegionServers in transition in cluster {{$labels.hbase_cluster}} are transitioning for longer than expected, which is above the threshold of %(alertsOldRegionsInTransition)s percent' - ) % $._config, + ) % this.config, }, }, { alert: 'HBaseHighMasterAuthFailRate', expr: ||| 100 * rate(master_authentication_failures[5m]) / (clamp_min(rate(master_authentication_successes[5m]), 1) + clamp_min(rate(master_authentication_failures[5m]), 1)) > %(alertsHighMasterAuthFailRate)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -69,14 +69,14 @@ description: ( '{{printf "%%.0f" $value}} percent of authentication attempts to the master are failing in cluster {{$labels.hbase_cluster}}, which is above the threshold of %(alertsHighMasterAuthFailRate)s percent' - ) % $._config, + ) % this.config, }, }, { alert: 'HBaseHighRSAuthFailRate', expr: ||| 100 * rate(region_server_authentication_failures[5m]) / (clamp_min(rate(region_server_authentication_successes[5m]), 1) + clamp_min(rate(region_server_authentication_failures[5m]), 1)) > %(alertsHighRSAuthFailRate)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -86,7 +86,7 @@ description: ( '{{printf "%%.0f" $value}} percent of authentication attempts to the RegionServer {{$labels.instance}} are failing in cluster {{$labels.hbase_cluster}}, which is above the threshold of %(alertsHighRSAuthFailRate)s percent' - ) % $._config, + ) % this.config, }, }, ], diff --git a/apache-hbase-mixin/config.libsonnet b/apache-hbase-mixin/config.libsonnet index 62d0416a3..5e6ab839e 100644 --- a/apache-hbase-mixin/config.libsonnet +++ b/apache-hbase-mixin/config.libsonnet @@ -1,20 +1,35 @@ { - _config+:: { - filterSelector: 'job="integrations/apache-hbase"', + local this = self, + filteringSelector: 'job="integrations/apache-hbase"', + groupLabels: ['job', 'hbase_cluster'], + instanceLabels: ['instance'], + logLabels: ['job', 'hbase_cluster', 'instance'], - dashboardTags: ['apache-hbase-mixin'], - dashboardPeriod: 'now-30m', - dashboardTimezone: 'default', - dashboardRefresh: '1m', + dashboardTags: [self.uid + '-mixin'], + uid: 'apache-hbase', + dashboardNamePrefix: 'Apache HBase', + dashboardPeriod: 'now-30m', + dashboardTimezone: 'default', + dashboardRefresh: '1m', + metricsSource: ['prometheus', 'prometheusv2'], - // alerts thresholds - alertsHighHeapMemUsage: 80, // percentage - alertsHighNonHeapMemUsage: 80, // percentage - alertsDeadRegionServer: 0, // count - alertsOldRegionsInTransition: 50, // percentage - alertsHighMasterAuthFailRate: 35, // percentage - alertsHighRSAuthFailRate: 35, // percentage + // Logging configuration + enableLokiLogs: true, + extraLogLabels: ['level'], + logsVolumeGroupBy: 'level', + showLogsVolume: true, - enableLokiLogs: true, + // Alerts thresholds + alertsHighHeapMemUsage: 80, // percentage + alertsHighNonHeapMemUsage: 80, // percentage + alertsDeadRegionServer: 0, // count + alertsOldRegionsInTransition: 50, // percentage + alertsHighMasterAuthFailRate: 35, // percentage + alertsHighRSAuthFailRate: 35, // percentage + + // Signals configuration + signals+: { + cluster: (import './signals/cluster.libsonnet')(this), + regionserver: (import './signals/regionserver.libsonnet')(this), }, } diff --git a/apache-hbase-mixin/dashboards.libsonnet b/apache-hbase-mixin/dashboards.libsonnet new file mode 100644 index 000000000..99d27da6c --- /dev/null +++ b/apache-hbase-mixin/dashboards.libsonnet @@ -0,0 +1,105 @@ +local g = import './g.libsonnet'; +local commonlib = import 'common-lib/common/main.libsonnet'; +local logslib = import 'logs-lib/logs/main.libsonnet'; + +{ + local root = self, + new(this):: + local prefix = this.config.dashboardNamePrefix; + local links = this.grafana.links; + local tags = this.config.dashboardTags; + local uid = g.util.string.slugify(this.config.uid); + local vars = this.grafana.variables; + local annotations = this.grafana.annotations; + local refresh = this.config.dashboardRefresh; + local period = this.config.dashboardPeriod; + local timezone = this.config.dashboardTimezone; + { + 'apache-hbase-cluster-overview.json': + g.dashboard.new(prefix + ' cluster overview') + + g.dashboard.withPanels( + g.util.panel.resolveCollapsedFlagOnRows( + g.util.grid.wrapPanels( + [ + this.grafana.rows.clusterOverview, + ] + ) + ) + ) + root.applyCommon( + vars.multiInstance, + uid + '-cluster-overview', + tags, + links { clusterOverview:: {} }, + annotations, + timezone, + refresh, + period, + ), + + 'apache-hbase-regionserver-overview.json': + g.dashboard.new(prefix + ' RegionServer overview') + + g.dashboard.withPanels( + g.util.panel.resolveCollapsedFlagOnRows( + g.util.grid.wrapPanels( + [ + this.grafana.rows.regionServerOverview, + ] + ) + ) + ) + root.applyCommon( + vars.multiInstance, + uid + '-regionserver-overview', + tags, + links { regionServerOverview:: {} }, + annotations, + timezone, + refresh, + period, + ), + } + + + if this.config.enableLokiLogs then + { + 'apache-hbase-logs.json': + logslib.new( + prefix + ' logs', + datasourceName=this.grafana.variables.datasources.loki.name, + datasourceRegex=this.grafana.variables.datasources.loki.regex, + filterSelector=this.config.filteringSelector, + labels=this.config.logLabels + this.config.extraLogLabels, + formatParser=null, + showLogsVolume=this.config.showLogsVolume, + logsVolumeGroupBy=this.config.logsVolumeGroupBy, + ) + { + dashboards+: + { + logs+: + root.applyCommon(super.logs.templating.list, uid=uid + '_logs', tags=tags, links=links { logs+:: {} }, annotations=annotations, timezone=timezone, refresh=refresh, period=period), + }, + panels+: + { + logs+: + g.panel.logs.options.withEnableLogDetails(true) + + g.panel.logs.options.withShowTime(false) + + g.panel.logs.options.withWrapLogMessage(false), + }, + variables+: { + toArray+: [ + this.grafana.variables.datasources.prometheus { hide: 2 }, + ], + }, + }.dashboards.logs, + } + else {}, + + applyCommon(vars, uid, tags, links, annotations, timezone, refresh, period): + g.dashboard.withTags(tags) + + g.dashboard.withUid(uid) + + g.dashboard.withLinks(std.objectValues(links)) + + g.dashboard.withTimezone(timezone) + + g.dashboard.withRefresh(refresh) + + g.dashboard.time.withFrom(period) + + g.dashboard.withVariables(vars) + + g.dashboard.withAnnotations(std.objectValues(annotations)), +} diff --git a/apache-hbase-mixin/dashboards/apache-hbase-cluster-overview.libsonnet b/apache-hbase-mixin/dashboards/apache-hbase-cluster-overview.libsonnet deleted file mode 100644 index 24f05644b..000000000 --- a/apache-hbase-mixin/dashboards/apache-hbase-cluster-overview.libsonnet +++ /dev/null @@ -1,1121 +0,0 @@ -local g = (import 'grafana-builder/grafana.libsonnet'); -local grafana = (import 'grafonnet/grafana.libsonnet'); -local dashboard = grafana.dashboard; -local template = grafana.template; -local prometheus = grafana.prometheus; - -local dashboardUid = 'apache-hbase-cluster-overview'; - -local promDatasourceName = 'prometheus_datasource'; - -local promDatasource = { - uid: '${%s}' % promDatasourceName, -}; - -local masterStatusHistoryPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'max without (clusterid,deadregionservers,liveregionservers,servername,zookeeperquorum,isactivemaster) (server_num_region_servers{job=~"$job", hbase_cluster=~"$hbase_cluster", isactivemaster="true"} * 0 + 1 )', - datasource=promDatasource, - legendFormat='{{instance}}', - ), - prometheus.target( - '(max without (clusterid,deadregionservers,liveregionservers,servername,zookeeperquorum,isactivemaster) (server_num_region_servers{job=~"$job", hbase_cluster=~"$hbase_cluster", isactivemaster="false"}) * 0)', - datasource=promDatasource, - legendFormat='{{instance}}', - ), - ], - type: 'status-history', - title: 'Master status history', - description: 'Displays the current active and backup masters.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - custom: { - fillOpacity: 70, - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineWidth: 1, - }, - mappings: [ - { - options: { - '0': { - color: 'yellow', - index: 1, - text: 'Backup', - }, - '1': { - color: 'blue', - index: 0, - text: 'Active', - }, - }, - type: 'value', - }, - ], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - }, - overrides: [], - }, - maxDataPoints: 100, - options: { - colWidth: 0.9, - legend: { - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - rowHeight: 0.9, - showValue: 'never', - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, - transformations: [], -}; - -local liveRegionServersPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'server_num_region_servers{job=~"$job", hbase_cluster=~"$hbase_cluster", isactivemaster="true"}', - datasource=promDatasource, - legendFormat='{{hbase_cluster}}', - format='time_series', - ), - ], - type: 'stat', - title: 'Live RegionServers', - description: 'Number of RegionServers that are currently live.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - }, - overrides: [], - }, - options: { - colorMode: 'value', - graphMode: 'none', - justifyMode: 'auto', - orientation: 'auto', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - textMode: 'value', - }, - pluginVersion: '10.3.0-62488', -}; - -local deadRegionServersPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'server_num_dead_region_servers{job=~"$job", hbase_cluster=~"$hbase_cluster", isactivemaster="true"}', - datasource=promDatasource, - legendFormat='{{hbase_cluster}}', - format='time_series', - ), - ], - type: 'stat', - title: 'Dead RegionServers', - description: 'Number of RegionServers that are currently dead.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - { - color: 'red', - value: 1, - }, - ], - }, - }, - overrides: [], - }, - options: { - colorMode: 'value', - graphMode: 'none', - justifyMode: 'auto', - orientation: 'auto', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - textMode: 'value', - }, - pluginVersion: '10.3.0-62488', -}; - -local serversPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'label_replace(server_num_region_servers{job=~"$job", hbase_cluster=~"$hbase_cluster"}, "master_instance", "$1", "instance", "(.+)")', - datasource=promDatasource, - legendFormat='{{hbase_cluster}}', - format='table', - instant=true, - ), - prometheus.target( - 'label_replace(server_num_reference_files{job=~"$job", hbase_cluster=~"$hbase_cluster"}, "region_server_instance", "$1", "instance", "(.+)")', - datasource=promDatasource, - legendFormat='{{hbase_cluster}}', - format='table', - instant=true, - ), - ], - type: 'table', - title: 'Servers', - description: 'Servers for a cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - align: 'left', - cellOptions: { - type: 'auto', - }, - inspect: false, - }, - links: [], - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - }, - overrides: [ - { - matcher: { - id: 'byName', - options: 'RegionServer', - }, - properties: [ - { - id: 'links', - value: [ - { - title: '', - url: '/d/apache-hbase-regionserver-overview?from=${__from}&to=${__to}&var-instance=${__data.fields["RegionServer"]}', - }, - ], - }, - { - id: 'mappings', - value: [], - }, - ], - }, - { - matcher: { - id: 'byName', - options: 'Role', - }, - properties: [ - { - id: 'noValue', - value: 'RegionServer', - }, - { - id: 'mappings', - value: [ - { - options: { - 'false': { - index: 1, - text: 'backup master', - }, - 'true': { - color: 'text', - index: 0, - text: 'active master', - }, - }, - type: 'value', - }, - ], - }, - ], - }, - ], - }, - options: { - cellHeight: 'md', - footer: { - countRows: false, - fields: '', - reducer: [ - 'sum', - ], - show: false, - }, - showHeader: true, - sortBy: [], - }, - pluginVersion: '10.3.0-62488', - transformations: [ - { - id: 'merge', - options: {}, - }, - { - id: 'organize', - options: { - excludeByName: { - Time: true, - 'Time 1': true, - 'Time 2': true, - Value: true, - 'Value #A': true, - 'Value #B': true, - __name__: true, - '__name__ 1': true, - '__name__ 2': true, - clusterid: true, - 'clusterid 1': true, - 'clusterid 2': true, - context: true, - 'context 1': true, - 'context 2': true, - hbase_cluster: false, - 'hbase_cluster 1': true, - 'hbase_cluster 2': true, - instance: true, - 'instance 1': false, - 'instance 2': true, - isactivemaster: false, - 'isactivemaster 1': false, - 'isactivemaster 2': true, - job: true, - 'job 1': true, - 'job 2': true, - liveregionservers: true, - 'liveregionservers 1': true, - 'liveregionservers 2': true, - servername: false, - 'servername 1': false, - 'servername 2': true, - zookeeperquorum: true, - 'zookeeperquorum 1': true, - 'zookeeperquorum 2': true, - }, - indexByName: { - Time: 5, - 'Value #A': 12, - 'Value #B': 13, - __name__: 6, - clusterid: 7, - context: 8, - hbase_cluster: 4, - hostname: 1, - instance: 2, - isactivemaster: 3, - job: 9, - liveregionservers: 10, - servername: 0, - zookeeperquorum: 11, - }, - renameByName: { - Time: '', - deadregionservers: 'Dead server', - hbase_cluster: 'Cluster', - hostname: 'Hostname', - instance: 'Instance', - 'instance 1': '', - isactivemaster: 'Role', - 'isactivemaster 1': 'Master', - master_instance: 'Master', - region_server_instance: 'RegionServer', - servername: 'Servername', - 'servername 1': 'Servername', - }, - }, - }, - ], -}; - -local alertsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'master_num_open_connections{job=~"$job", hbase_cluster=~"$hbase_cluster"}', - datasource=promDatasource, - legendFormat='{{hbase_cluster}}', - format='time_series', - ), - ], - type: 'alertlist', - title: 'Alerts', - description: 'Panel to report on the status of integration alerts.', - options: { - alertInstanceLabelFilter: '{job=~"${job:regex}", hbase_cluster=~"${hbase_cluster:regex}"}', - alertName: '', - dashboardAlerts: false, - folder: '', - groupBy: [], - groupMode: 'default', - maxItems: 20, - sortOrder: 1, - stateFilter: { - 'error': true, - firing: true, - noData: true, - normal: true, - pending: true, - }, - viewMode: 'list', - }, -}; - -local jvmHeapMemoryUsagePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'jvm_metrics_mem_heap_used_m{job=~"$job", hbase_cluster=~"$hbase_cluster", processname=~"Master"} / clamp_min(jvm_metrics_mem_heap_committed_m{job=~"$job", hbase_cluster=~"$hbase_cluster", processname=~"Master"}, 1)', - datasource=promDatasource, - legendFormat='{{hbase_cluster}} - {{instance}}', - format='time_series', - ), - ], - type: 'timeseries', - title: 'JVM heap memory usage', - description: 'Heap memory usage for the JVM.', - fieldConfig: { - defaults: { - color: { - mode: 'continuous-BlYlRd', - }, - custom: { - axisBorderShow: false, - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 30, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'smooth', - lineWidth: 2, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - max: 1, - min: 0, - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'percentunit', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local connectionsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, hbase_cluster) (master_num_open_connections{job=~"$job", hbase_cluster=~"$hbase_cluster"})', - datasource=promDatasource, - legendFormat='{{hbase_cluster}} - masters', - format='time_series', - ), - prometheus.target( - 'sum by(job, hbase_cluster) (region_server_num_open_connections{job=~"$job", hbase_cluster=~"$hbase_cluster"})', - datasource=promDatasource, - legendFormat='{{hbase_cluster}} - RegionServers', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Connections', - description: 'Number of open connections to the cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisBorderShow: false, - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 30, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'smooth', - lineWidth: 2, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - decimals: 0, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local authenticationsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, hbase_cluster) (rate(master_authentication_successes{job=~"$job", hbase_cluster=~"$hbase_cluster"}[$__rate_interval]))', - datasource=promDatasource, - legendFormat='{{hbase_cluster}} - masters success', - format='time_series', - ), - prometheus.target( - 'sum by(job, hbase_cluster) (rate(master_authentication_failures{job=~"$job", hbase_cluster=~"$hbase_cluster"}[$__rate_interval]))', - datasource=promDatasource, - legendFormat='{{hbase_cluster}} - masters failure', - format='time_series', - ), - prometheus.target( - 'sum by(job, hbase_cluster) (rate(region_server_authentication_successes{job=~"$job", hbase_cluster=~"$hbase_cluster"}[$__rate_interval]))', - datasource=promDatasource, - legendFormat='{{hbase_cluster}} - rs success', - format='time_series', - ), - prometheus.target( - 'sum by(job, hbase_cluster) (rate(region_server_authentication_failures{job=~"$job", hbase_cluster=~"$hbase_cluster"}[$__rate_interval]))', - datasource=promDatasource, - legendFormat='{{hbase_cluster}} - rs failure', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Authentications', - description: 'Volume of successful and unsuccessful authentications.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisBorderShow: false, - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 30, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'smooth', - lineWidth: 2, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [ - 'min', - 'mean', - 'max', - ], - displayMode: 'table', - placement: 'right', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local masterQueueSizePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'master_queue_size{job=~"$job", hbase_cluster=~"$hbase_cluster"}', - datasource=promDatasource, - legendFormat='{{hbase_cluster}} - {{instance}}', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Master queue size', - description: 'The size of the queue of requests, operations, and tasks to be processed by the master.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisBorderShow: false, - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 30, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'smooth', - lineWidth: 2, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'decbytes', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local masterQueuedCallsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, hbase_cluster) (master_num_calls_in_general_queue{job=~"$job", hbase_cluster=~"$hbase_cluster"})', - datasource=promDatasource, - legendFormat='{{hbase_cluster}} - general', - format='time_series', - ), - prometheus.target( - 'sum by(job, hbase_cluster) (master_num_calls_in_replication_queue{job=~"$job", hbase_cluster=~"$hbase_cluster"})', - datasource=promDatasource, - legendFormat='{{hbase_cluster}} - replication', - format='time_series', - ), - prometheus.target( - 'sum by(job, hbase_cluster) (master_num_calls_in_read_queue{job=~"$job", hbase_cluster=~"$hbase_cluster"})', - datasource=promDatasource, - legendFormat='{{hbase_cluster}} - read', - format='time_series', - ), - prometheus.target( - 'sum by(job, hbase_cluster) (master_num_calls_in_write_queue{job=~"$job", hbase_cluster=~"$hbase_cluster"})', - datasource=promDatasource, - legendFormat='{{hbase_cluster}} - write', - format='time_series', - ), - prometheus.target( - 'sum by(job, hbase_cluster) (master_num_calls_in_scan_queue{job=~"$job", hbase_cluster=~"$hbase_cluster"})', - datasource=promDatasource, - legendFormat='{{hbase_cluster}} - scan', - format='time_series', - ), - prometheus.target( - 'sum by(job, hbase_cluster) (master_num_calls_in_priority_queue{job=~"$job", hbase_cluster=~"$hbase_cluster"})', - datasource=promDatasource, - legendFormat='{{hbase_cluster}} - priority', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Master queued calls', - description: 'The number of calls waiting to be processed by the master.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisBorderShow: false, - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 30, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'smooth', - lineWidth: 2, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [ - 'min', - 'mean', - 'max', - ], - displayMode: 'table', - placement: 'right', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local regionsInTransitionPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'assignment_manager_rit_count{job=~"$job", hbase_cluster=~"$hbase_cluster"}', - datasource=promDatasource, - legendFormat='{{hbase_cluster}}', - format='time_series', - ), - prometheus.target( - 'assignment_manager_rit_count_over_threshold{job=~"$job", hbase_cluster=~"$hbase_cluster"}', - datasource=promDatasource, - legendFormat='{{hbase_cluster}} - old', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Regions in transition', - description: 'The number of regions in transition for the cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisBorderShow: false, - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 30, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'smooth', - lineWidth: 2, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - decimals: 0, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local oldestRegionInTransitionPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'assignment_manager_rit_oldest_age{job=~"$job", hbase_cluster=~"$hbase_cluster"}', - datasource=promDatasource, - legendFormat='{{hbase_cluster}}', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Oldest region in transition', - description: 'The age of the longest region in transition for the master of the cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisBorderShow: false, - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 30, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'smooth', - lineWidth: 2, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'ms', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -{ - grafanaDashboards+:: { - 'apache-hbase-cluster-overview.json': - dashboard.new( - 'Apache HBase cluster overview', - time_from='%s' % $._config.dashboardPeriod, - tags=($._config.dashboardTags), - timezone='%s' % $._config.dashboardTimezone, - refresh='%s' % $._config.dashboardRefresh, - description='', - uid=dashboardUid, - ) - .addLink(grafana.link.dashboards( - asDropdown=false, - title='Other Apache HBase Dashboards', - includeVars=false, - keepTime=true, - tags=($._config.dashboardTags), - )) - .addTemplates( - [ - template.datasource( - promDatasourceName, - 'prometheus', - null, - label='Data Source', - refresh='load' - ), - template.new( - 'job', - promDatasource, - 'label_values(master_num_open_connections,job)', - label='Job', - refresh=2, - includeAll=true, - multi=true, - allValues='', - sort=0 - ), - template.new( - 'hbase_cluster', - promDatasource, - 'label_values(master_num_open_connections{job=~"$job"},hbase_cluster)', - label='HBase cluster', - refresh=2, - includeAll=true, - multi=true, - allValues='', - sort=0 - ), - ] - ) - .addPanels( - [ - masterStatusHistoryPanel { gridPos: { h: 6, w: 24, x: 0, y: 0 } }, - liveRegionServersPanel { gridPos: { h: 8, w: 5, x: 0, y: 6 } }, - deadRegionServersPanel { gridPos: { h: 8, w: 5, x: 5, y: 6 } }, - serversPanel { gridPos: { h: 8, w: 14, x: 10, y: 6 } }, - alertsPanel { gridPos: { h: 8, w: 12, x: 0, y: 14 } }, - jvmHeapMemoryUsagePanel { gridPos: { h: 8, w: 12, x: 12, y: 14 } }, - connectionsPanel { gridPos: { h: 8, w: 12, x: 0, y: 22 } }, - authenticationsPanel { gridPos: { h: 8, w: 12, x: 12, y: 22 } }, - masterQueueSizePanel { gridPos: { h: 8, w: 12, x: 0, y: 30 } }, - masterQueuedCallsPanel { gridPos: { h: 8, w: 12, x: 12, y: 30 } }, - regionsInTransitionPanel { gridPos: { h: 8, w: 12, x: 0, y: 38 } }, - oldestRegionInTransitionPanel { gridPos: { h: 8, w: 12, x: 12, y: 38 } }, - ] - ), - }, -} diff --git a/apache-hbase-mixin/dashboards/apache-hbase-logs-overview.libsonnet b/apache-hbase-mixin/dashboards/apache-hbase-logs-overview.libsonnet deleted file mode 100644 index d8de41929..000000000 --- a/apache-hbase-mixin/dashboards/apache-hbase-logs-overview.libsonnet +++ /dev/null @@ -1,32 +0,0 @@ -local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; -local logsDashboard = import 'github.com/grafana/jsonnet-libs/logs-lib/logs/main.libsonnet'; -{ - grafanaDashboards+:: - if $._config.enableLokiLogs then { - local apacheHBaseLogs = - logsDashboard.new( - 'Apache HBase logs overview', - datasourceName='loki_datasource', - datasourceRegex='', - filterSelector=$._config.filterSelector, - labels=['job', 'hbase_cluster', 'instance', 'logger', 'level'], - formatParser=null, - showLogsVolume=true - ) - { - panels+: - { - logs+: - // Apache HBase logs already have timestamp - g.panel.logs.options.withShowTime(false), - }, - dashboards+: - { - logs+: g.dashboard.withLinksMixin($.grafanaDashboards['apache-hbase-cluster-overview.json'].links) - + g.dashboard.withTags($._config.dashboardTags) - + g.dashboard.withRefresh($._config.dashboardRefresh), - }, - }, - 'apache-hbase-logs.json': apacheHBaseLogs.dashboards.logs, - } else {}, -} diff --git a/apache-hbase-mixin/dashboards/apache-hbase-regionserver-overview.libsonnet b/apache-hbase-mixin/dashboards/apache-hbase-regionserver-overview.libsonnet deleted file mode 100644 index ed93120f7..000000000 --- a/apache-hbase-mixin/dashboards/apache-hbase-regionserver-overview.libsonnet +++ /dev/null @@ -1,1300 +0,0 @@ -local g = (import 'grafana-builder/grafana.libsonnet'); -local grafana = (import 'grafonnet/grafana.libsonnet'); -local dashboard = grafana.dashboard; -local template = grafana.template; -local prometheus = grafana.prometheus; - -local dashboardUid = 'apache-hbase-regionserver-overview'; - -local promDatasourceName = 'prometheus_datasource'; - -local promDatasource = { - uid: '${%s}' % promDatasourceName, -}; - -local regionsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, hbase_cluster) (server_region_count{job=~"$job", hbase_cluster=~"$hbase_cluster", instance=~"$instance"})', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - ), - ], - type: 'stat', - title: 'Regions', - description: 'The number of regions hosted by the Region Server.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - decimals: 0, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - colorMode: 'value', - graphMode: 'none', - justifyMode: 'auto', - orientation: 'auto', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - textMode: 'auto', - }, - pluginVersion: '10.3.0-62488', -}; - -local storeFilesPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, hbase_cluster) (server_store_file_count{job=~"$job", hbase_cluster=~"$hbase_cluster", instance=~"$instance"})', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - ), - ], - type: 'stat', - title: 'Store files', - description: 'The number of store files on disk currently managed by the Region Server.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - decimals: 0, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - colorMode: 'value', - graphMode: 'none', - justifyMode: 'auto', - orientation: 'auto', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - textMode: 'auto', - }, - pluginVersion: '10.3.0-62488', -}; - -local storeFileSizePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, hbase_cluster) (server_store_file_size{job=~"$job", hbase_cluster=~"$hbase_cluster", instance=~"$instance"})', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - ), - ], - type: 'bargauge', - title: 'Store file size', - description: 'The total size of the store files on disk managed by the Region Server.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'decbytes', - }, - overrides: [], - }, - options: { - displayMode: 'gradient', - minVizHeight: 10, - minVizWidth: 0, - namePlacement: 'auto', - orientation: 'vertical', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - showUnfilled: true, - valueMode: 'color', - }, - pluginVersion: '10.3.0-62488', -}; - -local rpcConnectionsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, hbase_cluster) (region_server_num_open_connections{job=~"$job", hbase_cluster=~"$hbase_cluster", instance=~"$instance"})', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - ), - ], - type: 'stat', - title: 'RPC connections', - description: 'The number of open connections to the Region Server.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - colorMode: 'value', - graphMode: 'none', - justifyMode: 'auto', - orientation: 'auto', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - textMode: 'auto', - }, - pluginVersion: '10.3.0-62488', -}; - -local jvmHeapMemoryUsagePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'jvm_metrics_mem_heap_used_m{job=~"$job", hbase_cluster=~"$hbase_cluster", instance=~"$instance", processname="RegionServer"} / clamp_min(jvm_metrics_mem_heap_committed_m{job=~"$job", hbase_cluster=~"$hbase_cluster", instance=~"$instance", processname="RegionServer"}, 1)', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - ), - ], - type: 'timeseries', - title: 'JVM heap memory usage', - description: 'Heap memory usage for the JVM.', - fieldConfig: { - defaults: { - color: { - mode: 'continuous-BlYlRd', - }, - custom: { - axisBorderShow: false, - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 30, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'smooth', - lineWidth: 2, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - max: 1, - min: 0, - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'percentunit', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local requestsReceivedPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'rate(server_total_request_count{job=~"$job", hbase_cluster=~"$hbase_cluster", instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Requests received', - description: 'The rate of requests received by the Region Server.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisBorderShow: false, - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 30, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'smooth', - lineWidth: 2, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'reqps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local requestsOverviewPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, hbase_cluster) (rate(server_read_request_count{job=~"$job", hbase_cluster=~"$hbase_cluster", instance=~"$instance"}[$__rate_interval]))', - datasource=promDatasource, - legendFormat='read', - format='time_series', - ), - prometheus.target( - 'sum by(job, hbase_cluster) (rate(server_write_request_count{job=~"$job", hbase_cluster=~"$hbase_cluster", instance=~"$instance"}[$__rate_interval]))', - datasource=promDatasource, - legendFormat='write', - format='time_series', - ), - prometheus.target( - 'sum by(job, hbase_cluster) (rate(server_cp_request_count{job=~"$job", hbase_cluster=~"$hbase_cluster", instance=~"$instance"}[$__rate_interval]))', - datasource=promDatasource, - legendFormat='copy', - format='time_series', - ), - prometheus.target( - 'sum by(job, hbase_cluster) (rate(server_filtered_read_request_count{job=~"$job", hbase_cluster=~"$hbase_cluster", instance=~"$instance"}[$__rate_interval]))', - datasource=promDatasource, - legendFormat='filtered read', - format='time_series', - ), - prometheus.target( - 'sum by(job, hbase_cluster) (rate(server_rpc_get_request_count{job=~"$job", hbase_cluster=~"$hbase_cluster", instance=~"$instance"}[$__rate_interval]))', - datasource=promDatasource, - legendFormat='rpc get', - format='time_series', - ), - prometheus.target( - 'sum by(job, hbase_cluster) (rate(server_rpc_scan_request_count{job=~"$job", hbase_cluster=~"$hbase_cluster", instance=~"$instance"}[$__rate_interval]))', - datasource=promDatasource, - legendFormat='rpc scan', - format='time_series', - ), - prometheus.target( - 'sum by(job, hbase_cluster) (rate(server_rpc_full_scan_request_count{job=~"$job", hbase_cluster=~"$hbase_cluster", instance=~"$instance"}[$__rate_interval]))', - datasource=promDatasource, - legendFormat='rpc full scan', - format='time_series', - ), - prometheus.target( - 'sum by(job, hbase_cluster) (rate(server_rpc_mutate_request_count{job=~"$job", hbase_cluster=~"$hbase_cluster", instance=~"$instance"}[$__rate_interval]))', - datasource=promDatasource, - legendFormat='rpc mutate', - format='time_series', - ), - prometheus.target( - 'sum by(job, hbase_cluster) (rate(server_rpc_multi_request_count{job=~"$job", hbase_cluster=~"$hbase_cluster", instance=~"$instance"}[$__rate_interval]))', - datasource=promDatasource, - legendFormat='rpc multi', - format='time_series', - ), - ], - type: 'piechart', - title: 'Requests overview', - description: 'Requests received by the Region Server, broken down by type.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - }, - mappings: [], - unit: 'reqps', - }, - overrides: [], - }, - options: { - legend: { - displayMode: 'list', - placement: 'right', - showLegend: true, - }, - pieType: 'pie', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local regionCountPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'server_region_count{job=~"$job", hbase_cluster=~"$hbase_cluster", instance=~"$instance"}', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Region count', - description: 'The number of regions hosted by the Region Server.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisBorderShow: false, - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 30, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'smooth', - lineWidth: 2, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - decimals: 0, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local rpcConnectionCountPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'region_server_num_open_connections{job=~"$job", hbase_cluster=~"$hbase_cluster", instance=~"$instance"}', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - ), - ], - type: 'timeseries', - title: 'RPC connection count', - description: 'The number of open connections to the Region Server.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisBorderShow: false, - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 30, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'smooth', - lineWidth: 2, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - decimals: 0, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local storeFileCountPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'server_store_file_count{job=~"$job", hbase_cluster=~"$hbase_cluster", instance=~"$instance"}', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Store file count', - description: 'The number of store files on disk currently managed by the Region Server.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisBorderShow: false, - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 30, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'smooth', - lineWidth: 2, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - decimals: 0, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local storeFileSizeTimeseriesPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'server_store_file_size{job=~"$job", hbase_cluster=~"$hbase_cluster", instance=~"$instance"}', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Store file size', - description: 'The total size of the store files on disk managed by the Region Server.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisBorderShow: false, - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 30, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'smooth', - lineWidth: 2, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'decbytes', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local queuedCallsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, hbase_cluster) (region_server_num_calls_in_general_queue{job=~"$job", hbase_cluster=~"$hbase_cluster", instance=~"$instance"})', - datasource=promDatasource, - legendFormat='general', - format='time_series', - ), - prometheus.target( - 'sum by(job, hbase_cluster) (region_server_num_calls_in_replication_queue{job=~"$job", hbase_cluster=~"$hbase_cluster", instance=~"$instance"})', - datasource=promDatasource, - legendFormat='replication', - format='time_series', - ), - prometheus.target( - 'sum by(job, hbase_cluster) (region_server_num_calls_in_read_queue{job=~"$job", hbase_cluster=~"$hbase_cluster", instance=~"$instance"})', - datasource=promDatasource, - legendFormat='read', - format='time_series', - ), - prometheus.target( - 'sum by(job, hbase_cluster) (region_server_num_calls_in_write_queue{job=~"$job", hbase_cluster=~"$hbase_cluster", instance=~"$instance"})', - datasource=promDatasource, - legendFormat='write', - format='time_series', - ), - prometheus.target( - 'sum by(job, hbase_cluster) (region_server_num_calls_in_scan_queue{job=~"$job", hbase_cluster=~"$hbase_cluster", instance=~"$instance"})', - datasource=promDatasource, - legendFormat='scan', - format='time_series', - ), - prometheus.target( - 'sum by(job, hbase_cluster) (region_server_num_calls_in_priority_queue{job=~"$job", hbase_cluster=~"$hbase_cluster", instance=~"$instance"})', - datasource=promDatasource, - legendFormat='priority', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Queued calls', - description: 'The number of calls waiting to be processed by the Region Server.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisBorderShow: false, - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 30, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'smooth', - lineWidth: 2, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [ - 'min', - 'mean', - 'max', - ], - displayMode: 'table', - placement: 'right', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local slowOperationsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, hbase_cluster) (rate(server_slow_append_count{job=~"$job", hbase_cluster=~"$hbase_cluster", instance=~"$instance"}[$__rate_interval]))', - datasource=promDatasource, - legendFormat='append', - format='time_series', - ), - prometheus.target( - 'sum by(job, hbase_cluster) (rate(server_slow_put_count{job=~"$job", hbase_cluster=~"$hbase_cluster", instance=~"$instance"}[$__rate_interval]))', - datasource=promDatasource, - legendFormat='put', - format='time_series', - ), - prometheus.target( - 'sum by(job, hbase_cluster) (rate(server_slow_delete_count{job=~"$job", hbase_cluster=~"$hbase_cluster", instance=~"$instance"}[$__rate_interval]))', - datasource=promDatasource, - legendFormat='delete', - format='time_series', - ), - prometheus.target( - 'sum by(job, hbase_cluster) (rate(server_slow_get_count{job=~"$job", hbase_cluster=~"$hbase_cluster", instance=~"$instance"}[$__rate_interval]))', - datasource=promDatasource, - legendFormat='get', - format='time_series', - ), - prometheus.target( - 'sum by(job, hbase_cluster) (rate(server_slow_increment_count{job=~"$job", hbase_cluster=~"$hbase_cluster", instance=~"$instance"}[$__rate_interval]))', - datasource=promDatasource, - legendFormat='increment', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Slow operations', - description: 'The rate of operations that are slow, as determined by HBase.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisBorderShow: false, - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 30, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'smooth', - lineWidth: 2, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'ops', - }, - overrides: [], - }, - options: { - legend: { - calcs: [ - 'min', - 'mean', - 'max', - ], - displayMode: 'table', - placement: 'right', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local cacheHitPercentagePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'server_block_cache_express_hit_percent{job=~"$job", hbase_cluster=~"$hbase_cluster", instance=~"$instance"}', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Cache hit percentage', - description: 'The percent of time that requests hit the cache.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisBorderShow: false, - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 30, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'smooth', - lineWidth: 2, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - max: 100, - min: 0, - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'percent', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local authenticationsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(job, hbase_cluster) (rate(region_server_authentication_successes{job=~"$job", hbase_cluster=~"$hbase_cluster", instance=~"$instance"}[$__rate_interval]))', - datasource=promDatasource, - legendFormat='success', - format='time_series', - ), - prometheus.target( - 'sum by(job, hbase_cluster) (rate(region_server_authentication_failures{job=~"$job", hbase_cluster=~"$hbase_cluster", instance=~"$instance"}[$__rate_interval]))', - datasource=promDatasource, - legendFormat='failure', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Authentications', - description: 'The rate of successful and unsuccessful authentications.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisBorderShow: false, - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 30, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'smooth', - lineWidth: 2, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'reqps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [ - 'min', - 'mean', - 'max', - ], - displayMode: 'table', - placement: 'right', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -{ - grafanaDashboards+:: { - 'apache-hbase-regionserver-overview.json': - dashboard.new( - 'Apache HBase RegionServer overview', - time_from=$._config.dashboardPeriod, - tags=($._config.dashboardTags), - timezone=$._config.dashboardTimezone, - refresh=$._config.dashboardRefresh, - description='', - uid=dashboardUid, - ) - .addLink(grafana.link.dashboards( - asDropdown=false, - title='Other Apache HBase dashboards', - includeVars=true, - keepTime=true, - tags=($._config.dashboardTags), - )) - .addTemplates( - [ - template.datasource( - promDatasourceName, - 'prometheus', - null, - label='Data Source', - refresh='load' - ), - template.new( - 'job', - promDatasource, - 'label_values(master_num_open_connections,job)', - label='Job', - refresh=2, - includeAll=true, - multi=true, - allValues='', - sort=0 - ), - template.new( - 'hbase_cluster', - promDatasource, - 'label_values(master_num_open_connections{job=~"$job"},hbase_cluster)', - label='Apache HBase cluster', - refresh=2, - includeAll=true, - multi=true, - allValues='', - sort=0 - ), - template.new( - 'instance', - promDatasource, - 'label_values(server_region_count{job=~"$job", hbase_cluster=~"$hbase_cluster"},instance)', - label='Instance', - refresh=2, - includeAll=true, - multi=true, - allValues='', - sort=0 - ), - ] - ) - .addPanels( - [ - regionsPanel { gridPos: { h: 8, w: 3, x: 0, y: 0 } }, - storeFilesPanel { gridPos: { h: 8, w: 3, x: 3, y: 0 } }, - storeFileSizePanel { gridPos: { h: 8, w: 3, x: 6, y: 0 } }, - rpcConnectionsPanel { gridPos: { h: 8, w: 3, x: 9, y: 0 } }, - jvmHeapMemoryUsagePanel { gridPos: { h: 8, w: 12, x: 12, y: 0 } }, - requestsReceivedPanel { gridPos: { h: 8, w: 16, x: 0, y: 8 } }, - requestsOverviewPanel { gridPos: { h: 8, w: 8, x: 16, y: 8 } }, - regionCountPanel { gridPos: { h: 8, w: 12, x: 0, y: 16 } }, - rpcConnectionCountPanel { gridPos: { h: 8, w: 12, x: 12, y: 16 } }, - storeFileCountPanel { gridPos: { h: 8, w: 12, x: 0, y: 24 } }, - storeFileSizeTimeseriesPanel { gridPos: { h: 8, w: 12, x: 12, y: 24 } }, - queuedCallsPanel { gridPos: { h: 8, w: 12, x: 0, y: 32 } }, - slowOperationsPanel { gridPos: { h: 8, w: 12, x: 12, y: 32 } }, - cacheHitPercentagePanel { gridPos: { h: 8, w: 12, x: 0, y: 40 } }, - authenticationsPanel { gridPos: { h: 8, w: 12, x: 12, y: 40 } }, - ] - ), - }, -} diff --git a/apache-hbase-mixin/dashboards/dashboards.libsonnet b/apache-hbase-mixin/dashboards/dashboards.libsonnet deleted file mode 100644 index ff96bcd4e..000000000 --- a/apache-hbase-mixin/dashboards/dashboards.libsonnet +++ /dev/null @@ -1,3 +0,0 @@ -(import 'apache-hbase-cluster-overview.libsonnet') + -(import 'apache-hbase-regionserver-overview.libsonnet') + -(import 'apache-hbase-logs-overview.libsonnet') diff --git a/apache-hbase-mixin/dashboards_out/apache-hbase-cluster-overview.json b/apache-hbase-mixin/dashboards_out/apache-hbase-cluster-overview.json index 972eb5d96..63836afe4 100644 --- a/apache-hbase-mixin/dashboards_out/apache-hbase-cluster-overview.json +++ b/apache-hbase-mixin/dashboards_out/apache-hbase-cluster-overview.json @@ -1,48 +1,54 @@ { - "__inputs": [ ], - "__requires": [ ], "annotations": { "list": [ ] }, - "description": "", - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "id": null, "links": [ { - "asDropdown": false, - "icon": "external link", - "includeVars": false, + "keepTime": true, + "title": "Apache HBase logs", + "type": "link", + "url": "/d/apachehbase_logs" + }, + { + "asDropdown": true, + "includeVars": true, "keepTime": true, "tags": [ "apache-hbase-mixin" ], - "targetBlank": false, - "title": "Other Apache HBase Dashboards", - "type": "dashboards", - "url": "" + "title": "All Apache HBase dashboards", + "type": "dashboards" + }, + { + "keepTime": true, + "title": "Apache HBase RegionServer overview", + "type": "link", + "url": "/d/apachehbase-regionserver-overview" } ], "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 0 + }, + "id": 1, + "panels": [ ], + "title": "Cluster overview", + "type": "row" + }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Displays the current active and backup masters.", "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, "custom": { - "fillOpacity": 70, - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, "lineWidth": 1 }, "mappings": [ @@ -63,115 +69,97 @@ } ], "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] + "steps": [ ] } - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, + "h": 8, "w": 24, "x": 0, - "y": 0 + "y": 1 }, "id": 2, "maxDataPoints": 100, "options": { - "colWidth": 0.90000000000000002, "legend": { "displayMode": "list", "placement": "bottom", "showLegend": true }, - "rowHeight": 0.90000000000000002, - "showValue": "never", - "tooltip": { - "mode": "multi", - "sort": "none" - } + "showValue": "never" }, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "max without (clusterid,deadregionservers,liveregionservers,servername,zookeeperquorum,isactivemaster) (server_num_region_servers{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\", isactivemaster=\"true\"} * 0 + 1 )", + "expr": "max without (clusterid,deadregionservers,liveregionservers,servername,zookeeperquorum,isactivemaster) (server_num_region_servers{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\", isactivemaster=\"true\"} * 0 + 1 )", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "legendFormat": "{{hbase_cluster}}", + "refId": "Master status history number of region servers" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "(max without (clusterid,deadregionservers,liveregionservers,servername,zookeeperquorum,isactivemaster) (server_num_region_servers{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\", isactivemaster=\"false\"}) * 0)", + "expr": "(max without (clusterid,deadregionservers,liveregionservers,servername,zookeeperquorum,isactivemaster) (server_num_region_servers{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\", isactivemaster=\"false\"}) * 0)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "legendFormat": "{{hbase_cluster}}", + "refId": "Non-master status history number of region servers" } ], "title": "Master status history", - "transformations": [ ], "type": "status-history" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "description": "Number of RegionServers that are currently live.", "fieldConfig": { "defaults": { "color": { - "mode": "thresholds" + "fixedColor": "text", + "mode": "fixed" }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - } - }, - "overrides": [ ] + "unit": "short" + } }, "gridPos": { "h": 8, "w": 5, "x": 0, - "y": 6 + "y": 9 }, "id": 3, "options": { - "colorMode": "value", + "colorMode": "fixed", "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "value" + ] + } }, - "pluginVersion": "10.3.0-62488", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "server_num_region_servers{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\", isactivemaster=\"true\"}", + "expr": "server_num_region_servers{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\", isactivemaster=\"true\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{hbase_cluster}}" + "instant": false, + "legendFormat": "{{hbase_cluster}}", + "refId": "Live RegionServers" } ], "title": "Live RegionServers", @@ -179,61 +167,59 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "description": "Number of RegionServers that are currently dead.", "fieldConfig": { "defaults": { "color": { - "mode": "thresholds" + "fixedColor": "text", + "mode": "fixed" }, - "mappings": [ ], "thresholds": { - "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", "value": 1 } ] - } - }, - "overrides": [ ] + }, + "unit": "short" + } }, "gridPos": { "h": 8, "w": 5, "x": 5, - "y": 6 + "y": 9 }, "id": 4, "options": { - "colorMode": "value", + "colorMode": "fixed", "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "value" + ] + } }, - "pluginVersion": "10.3.0-62488", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "server_num_dead_region_servers{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\", isactivemaster=\"true\"}", + "expr": "server_num_dead_region_servers{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\", isactivemaster=\"true\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{hbase_cluster}}" + "instant": false, + "legendFormat": "{{hbase_cluster}}", + "refId": "Dead RegionServers" } ], "title": "Dead RegionServers", @@ -241,32 +227,11 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Servers for a cluster.", "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "align": "left", - "cellOptions": { - "type": "auto" - }, - "inspect": false - }, - "links": [ ], - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - } - }, "overrides": [ { "matcher": { @@ -279,13 +244,9 @@ "value": [ { "title": "", - "url": "/d/apache-hbase-regionserver-overview?from=${__from}&to=${__to}&var-instance=${__data.fields[\"RegionServer\"]}" + "url": "/d/apachehbase-regionserver-overview?from=${__from}&to=${__to}&var-instance=${__data.fields[\"RegionServer\"]}" } ] - }, - { - "id": "mappings", - "value": [ ] } ] }, @@ -326,43 +287,32 @@ "h": 8, "w": 14, "x": 10, - "y": 6 + "y": 9 }, "id": 5, - "options": { - "cellHeight": "md", - "footer": { - "countRows": false, - "fields": "", - "reducer": [ - "sum" - ], - "show": false - }, - "showHeader": true, - "sortBy": [ ] - }, - "pluginVersion": "10.3.0-62488", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "label_replace(server_num_region_servers{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\"}, \"master_instance\", \"$1\", \"instance\", \"(.+)\")", + "expr": "label_replace(server_num_region_servers{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"}, \"master_instance\", \"$1\", \"instance\", \"(.+)\")", "format": "table", "instant": true, - "intervalFactor": 2, - "legendFormat": "{{hbase_cluster}}" + "legendFormat": "{{hbase_cluster}}", + "refId": "Server live count" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "label_replace(server_num_reference_files{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\"}, \"region_server_instance\", \"$1\", \"instance\", \"(.+)\")", + "expr": "label_replace(server_num_reference_files{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"}, \"region_server_instance\", \"$1\", \"instance\", \"(.+)\")", "format": "table", "instant": true, - "intervalFactor": 2, - "legendFormat": "{{hbase_cluster}}" + "legendFormat": "{{hbase_cluster}}", + "refId": "Region server list" } ], "title": "Servers", @@ -448,22 +398,17 @@ "type": "table" }, { - "datasource": { - "uid": "${prometheus_datasource}" - }, "description": "Panel to report on the status of integration alerts.", "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 14 + "y": 17 }, "id": 6, "options": { "alertInstanceLabelFilter": "{job=~\"${job:regex}\", hbase_cluster=~\"${hbase_cluster:regex}\"}", - "alertName": "", "dashboardAlerts": false, - "folder": "", "groupBy": [ ], "groupMode": "default", "maxItems": 20, @@ -474,18 +419,19 @@ "noData": true, "normal": true, "pending": true - }, - "viewMode": "list" + } }, "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "master_num_open_connections{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\"}", + "expr": "sum by(job,hbase_cluster) (master_num_open_connections{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{hbase_cluster}}" + "instant": false, + "legendFormat": "{{hbase_cluster}} - masters", + "refId": "Master connections" } ], "title": "Alerts", @@ -493,6 +439,7 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "description": "Heap memory usage for the JVM.", @@ -502,84 +449,47 @@ "mode": "continuous-BlYlRd" }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", "fillOpacity": 30, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, + "gradientMode": "scheme", "lineInterpolation": "smooth", "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "showPoints": "never" }, - "mappings": [ ], + "decimals": 1, "max": 1, "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, "unit": "percentunit" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 14 + "y": 17 }, "id": 7, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "jvm_metrics_mem_heap_used_m{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\", processname=~\"Master\"} / clamp_min(jvm_metrics_mem_heap_committed_m{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\", processname=~\"Master\"}, 1)", + "expr": "jvm_metrics_mem_heap_used_m{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\", processname=~\"Master\"} / clamp_min(jvm_metrics_mem_heap_committed_m{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\", processname=~\"Master\"}, 1)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{hbase_cluster}} - {{instance}}" + "instant": false, + "legendFormat": "{{hbase_cluster}} - {{instance}}", + "refId": "Master JVM heap memory usage" } ], "title": "JVM heap memory usage", @@ -587,97 +497,67 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "description": "Number of open connections to the cluster.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", "fillOpacity": 30, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } }, "decimals": 0, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "none" - }, - "overrides": [ ] + "noValue": "No packets", + "unit": "short" + } }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 22 + "y": 25 }, "id": 8, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, hbase_cluster) (master_num_open_connections{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\"})", + "expr": "sum by(job,hbase_cluster) (master_num_open_connections{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{hbase_cluster}} - masters" + "instant": false, + "legendFormat": "{{hbase_cluster}} - masters", + "refId": "Master connections" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, hbase_cluster) (region_server_num_open_connections{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\"})", + "expr": "sum by(job,hbase_cluster) (region_server_num_open_connections{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{hbase_cluster}} - RegionServers" + "instant": false, + "legendFormat": "{{hbase_cluster}} - RegionServers", + "refId": "RegionServer connections" } ], "title": "Connections", @@ -685,122 +565,90 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "description": "Volume of successful and unsuccessful authentications.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", "fillOpacity": 30, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } + "showPoints": "never" }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [ ] + "unit": "reqps" + } }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 22 + "y": 25 }, "id": 9, "options": { "legend": { + "asTable": true, "calcs": [ "min", "mean", "max" ], "displayMode": "table", - "placement": "right", - "showLegend": true + "placement": "right" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, hbase_cluster) (rate(master_authentication_successes{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\"}[$__rate_interval]))", + "expr": "sum by(job,hbase_cluster) (master_authentication_successes{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{hbase_cluster}} - masters success" + "instant": false, + "legendFormat": "{{hbase_cluster}} - masters success", + "refId": "Master authentication successes" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, hbase_cluster) (rate(master_authentication_failures{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\"}[$__rate_interval]))", + "expr": "sum by(job,hbase_cluster) (master_authentication_failures{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{hbase_cluster}} - masters failure" + "instant": false, + "legendFormat": "{{hbase_cluster}} - masters failure", + "refId": "Master authentication failures" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, hbase_cluster) (rate(region_server_authentication_successes{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\"}[$__rate_interval]))", + "expr": "sum by(job,hbase_cluster) (region_server_authentication_successes{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{hbase_cluster}} - rs success" + "instant": false, + "legendFormat": "{{hbase_cluster}} - rs success", + "refId": "RegionServer authentication successes" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, hbase_cluster) (rate(region_server_authentication_failures{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\"}[$__rate_interval]))", + "expr": "sum by(job,hbase_cluster) (region_server_authentication_failures{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{hbase_cluster}} - rs failure" + "instant": false, + "legendFormat": "{{hbase_cluster}} - rs failure", + "refId": "RegionServer authentication failures" } ], "title": "Authentications", @@ -808,91 +656,51 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "description": "The size of the queue of requests, operations, and tasks to be processed by the master.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", "fillOpacity": 30, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] + "showPoints": "never" }, "unit": "decbytes" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 30 + "y": 33 }, "id": 10, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "master_queue_size{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\"}", + "expr": "master_queue_size{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{hbase_cluster}} - {{instance}}" + "instant": false, + "legendFormat": "{{hbase_cluster}} - {{instance}}", + "refId": "Master queue size" } ], "title": "Master queue size", @@ -900,140 +708,112 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "description": "The number of calls waiting to be processed by the master.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", "fillOpacity": 30, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } + "showPoints": "never" }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [ ] + "unit": "short" + } }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 30 + "y": 33 }, "id": 11, "options": { "legend": { + "asTable": true, "calcs": [ "min", "mean", "max" ], "displayMode": "table", - "placement": "right", - "showLegend": true + "placement": "right" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, hbase_cluster) (master_num_calls_in_general_queue{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\"})", + "expr": "sum by(job,hbase_cluster) (master_num_calls_in_general_queue{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{hbase_cluster}} - general" + "instant": false, + "legendFormat": "{{hbase_cluster}} - general", + "refId": "Master calls in general queue" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, hbase_cluster) (master_num_calls_in_replication_queue{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\"})", + "expr": "sum by(job,hbase_cluster) (master_num_calls_in_replication_queue{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{hbase_cluster}} - replication" + "instant": false, + "legendFormat": "{{hbase_cluster}} - replication", + "refId": "Master calls in replication queue" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, hbase_cluster) (master_num_calls_in_read_queue{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\"})", + "expr": "sum by(job,hbase_cluster) (master_num_calls_in_read_queue{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{hbase_cluster}} - read" + "instant": false, + "legendFormat": "{{hbase_cluster}} - read", + "refId": "Master calls in read queue" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, hbase_cluster) (master_num_calls_in_write_queue{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\"})", + "expr": "sum by(job,hbase_cluster) (master_num_calls_in_write_queue{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{hbase_cluster}} - write" + "instant": false, + "legendFormat": "{{hbase_cluster}} - write", + "refId": "Master calls in write queue" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, hbase_cluster) (master_num_calls_in_scan_queue{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\"})", + "expr": "sum by(job,hbase_cluster) (master_num_calls_in_scan_queue{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{hbase_cluster}} - scan" + "instant": false, + "legendFormat": "{{hbase_cluster}} - scan", + "refId": "Master calls in scan queue" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, hbase_cluster) (master_num_calls_in_priority_queue{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\"})", + "expr": "sum by(job,hbase_cluster) (master_num_calls_in_priority_queue{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{hbase_cluster}} - priority" + "instant": false, + "legendFormat": "{{hbase_cluster}} - priority", + "refId": "Master calls in priority queue" } ], "title": "Master queued calls", @@ -1041,101 +821,62 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "description": "The number of regions in transition for the cluster.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", "fillOpacity": 30, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "showPoints": "never" }, - "decimals": 0, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [ ] + "unit": "short" + } }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 38 + "y": 41 }, "id": 12, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "assignment_manager_rit_count{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\"}", + "expr": "assignment_manager_rit_count{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{hbase_cluster}}" + "instant": false, + "legendFormat": "{{hbase_cluster}}", + "refId": "Regions in transition" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "assignment_manager_rit_count_over_threshold{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\"}", + "expr": "assignment_manager_rit_count_over_threshold{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{hbase_cluster}} - old" + "instant": false, + "legendFormat": "{{hbase_cluster}} - old", + "refId": "Old regions in transition" } ], "title": "Regions in transition", @@ -1143,91 +884,51 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "description": "The age of the longest region in transition for the master of the cluster.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", "fillOpacity": 30, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] + "showPoints": "never" }, "unit": "ms" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 38 + "y": 41 }, "id": 13, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "assignment_manager_rit_oldest_age{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\"}", + "expr": "assignment_manager_rit_oldest_age{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{hbase_cluster}}" + "instant": false, + "legendFormat": "{{hbase_cluster}}", + "refId": "Oldest region in transition age" } ], "title": "Oldest region in transition", @@ -1235,68 +936,71 @@ } ], "refresh": "1m", - "rows": [ ], - "schemaVersion": 14, - "style": "dark", + "schemaVersion": 39, "tags": [ "apache-hbase-mixin" ], "templating": { "list": [ { - "current": { }, - "hide": 0, - "label": "Data Source", + "label": "Prometheus data source", "name": "prometheus_datasource", - "options": [ ], "query": "prometheus", - "refresh": 1, "regex": "", "type": "datasource" }, { - "allValue": "", - "current": { }, + "allValue": ".+", "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "hide": 0, "includeAll": true, "label": "Job", "multi": true, "name": "job", - "options": [ ], - "query": "label_values(master_num_open_connections,job)", + "query": "label_values(server_num_region_servers{job=\"integrations/apache-hbase\"}, job)", "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "sort": 1, + "type": "query" }, { - "allValue": "", - "current": { }, + "allValue": ".+", "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "hide": 0, "includeAll": true, - "label": "HBase cluster", + "label": "Apache HBase cluster", "multi": true, "name": "hbase_cluster", - "options": [ ], - "query": "label_values(master_num_open_connections{job=~\"$job\"},hbase_cluster)", + "query": "label_values(server_num_region_servers{job=\"integrations/apache-hbase\",job=~\"$job\"}, hbase_cluster)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "includeAll": true, + "label": "Instance", + "multi": true, + "name": "instance", + "query": "label_values(server_num_region_servers{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\"}, instance)", "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "hide": 2, + "label": "Loki data source", + "name": "loki_datasource", + "query": "loki", "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "type": "datasource" } ] }, @@ -1304,33 +1008,7 @@ "from": "now-30m", "to": "now" }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, "timezone": "default", "title": "Apache HBase cluster overview", - "uid": "apache-hbase-cluster-overview", - "version": 0 + "uid": "apachehbase-cluster-overview" } \ No newline at end of file diff --git a/apache-hbase-mixin/dashboards_out/apache-hbase-logs.json b/apache-hbase-mixin/dashboards_out/apache-hbase-logs-overview.json similarity index 82% rename from apache-hbase-mixin/dashboards_out/apache-hbase-logs.json rename to apache-hbase-mixin/dashboards_out/apache-hbase-logs-overview.json index d80fc003a..121214aee 100644 --- a/apache-hbase-mixin/dashboards_out/apache-hbase-logs.json +++ b/apache-hbase-mixin/dashboards_out/apache-hbase-logs-overview.json @@ -1,17 +1,29 @@ { + "annotations": { + "list": [ ] + }, "links": [ { - "asDropdown": false, - "icon": "external link", - "includeVars": false, + "keepTime": true, + "title": "Apache HBase cluster overview", + "type": "link", + "url": "/d/apachehbase-cluster-overview" + }, + { + "asDropdown": true, + "includeVars": true, "keepTime": true, "tags": [ "apache-hbase-mixin" ], - "targetBlank": false, - "title": "Other Apache HBase Dashboards", - "type": "dashboards", - "url": "" + "title": "All Apache HBase dashboards", + "type": "dashboards" + }, + { + "keepTime": true, + "title": "Apache HBase RegionServer overview", + "type": "link", + "url": "/d/apachehbase-regionserver-overview" } ], "panels": [ @@ -161,7 +173,7 @@ "type": "loki", "uid": "${loki_datasource}" }, - "expr": "sum by (level) (count_over_time({job=~\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\",logger=~\"$logger\",level=~\"$level\"}\n|~ \"$regex_search\"\n\n[$__auto]))\n", + "expr": "sum by (level) (count_over_time({job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\",level=~\"$level\"}\n|~ \"$regex_search\"\n\n[$__auto]))\n", "legendFormat": "{{ level }}" } ], @@ -194,7 +206,7 @@ "enableLogDetails": true, "prettifyLogMessage": true, "showTime": false, - "wrapLogMessage": true + "wrapLogMessage": false }, "pluginVersion": "v11.0.0", "targets": [ @@ -203,7 +215,7 @@ "type": "loki", "uid": "${loki_datasource}" }, - "expr": "{job=~\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\",logger=~\"$logger\",level=~\"$level\"} \n|~ \"$regex_search\"\n\n\n" + "expr": "{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\",level=~\"$level\"} \n|~ \"$regex_search\"\n\n\n" } ], "title": "Logs", @@ -234,7 +246,7 @@ "label": "Job", "multi": true, "name": "job", - "query": "label_values({job=~\"integrations/apache-hbase\"}, job)", + "query": "label_values({job=\"integrations/apache-hbase\"}, job)", "refresh": 2, "sort": 1, "type": "query" @@ -246,10 +258,10 @@ "uid": "${loki_datasource}" }, "includeAll": true, - "label": "Hbase_cluster", + "label": "Apache HBase cluster", "multi": true, "name": "hbase_cluster", - "query": "label_values({job=~\"integrations/apache-hbase\",job=~\"$job\"}, hbase_cluster)", + "query": "label_values({job=\"integrations/apache-hbase\",job=~\"$job\"}, hbase_cluster)", "refresh": 2, "sort": 1, "type": "query" @@ -264,22 +276,7 @@ "label": "Instance", "multi": true, "name": "instance", - "query": "label_values({job=~\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\"}, instance)", - "refresh": 2, - "sort": 1, - "type": "query" - }, - { - "allValue": ".*", - "datasource": { - "type": "loki", - "uid": "${loki_datasource}" - }, - "includeAll": true, - "label": "Logger", - "multi": true, - "name": "logger", - "query": "label_values({job=~\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"}, logger)", + "query": "label_values({job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\"}, instance)", "refresh": 2, "sort": 1, "type": "query" @@ -294,7 +291,7 @@ "label": "Level", "multi": true, "name": "level", - "query": "label_values({job=~\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\",logger=~\"$logger\"}, level)", + "query": "label_values({job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"}, level)", "refresh": 2, "sort": 1, "type": "query" @@ -316,14 +313,22 @@ ], "query": "", "type": "textbox" + }, + { + "hide": 2, + "label": "Prometheus data source", + "name": "prometheus_datasource", + "query": "prometheus", + "regex": "", + "type": "datasource" } ] }, "time": { - "from": "now-6h", + "from": "now-30m", "to": "now" }, - "timezone": "utc", - "title": "Apache HBase logs overview", - "uid": "apache-hbase-logs-overview" + "timezone": "default", + "title": "Apache HBase logs", + "uid": "apachehbase_logs" } \ No newline at end of file diff --git a/apache-hbase-mixin/dashboards_out/apache-hbase-regionserver-overview.json b/apache-hbase-mixin/dashboards_out/apache-hbase-regionserver-overview.json index bb10b975b..5116eeacc 100644 --- a/apache-hbase-mixin/dashboards_out/apache-hbase-regionserver-overview.json +++ b/apache-hbase-mixin/dashboards_out/apache-hbase-regionserver-overview.json @@ -1,91 +1,88 @@ { - "__inputs": [ ], - "__requires": [ ], "annotations": { "list": [ ] }, - "description": "", - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "id": null, "links": [ { - "asDropdown": false, - "icon": "external link", + "keepTime": true, + "title": "Apache HBase cluster overview", + "type": "link", + "url": "/d/apachehbase-cluster-overview" + }, + { + "keepTime": true, + "title": "Apache HBase logs", + "type": "link", + "url": "/d/apachehbase_logs" + }, + { + "asDropdown": true, "includeVars": true, "keepTime": true, "tags": [ "apache-hbase-mixin" ], - "targetBlank": false, - "title": "Other Apache HBase dashboards", - "type": "dashboards", - "url": "" + "title": "All Apache HBase dashboards", + "type": "dashboards" } ], "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 0 + }, + "id": 1, + "panels": [ ], + "title": "RegionServer overview", + "type": "row" + }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "The number of regions hosted by the Region Server.", + "description": "The number of regions hosted by the RegionServer.", "fieldConfig": { "defaults": { "color": { - "mode": "thresholds" - }, - "decimals": 0, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "fixedColor": "text", + "mode": "fixed" }, - "unit": "none" - }, - "overrides": [ ] + "unit": "short" + } }, "gridPos": { "h": 8, "w": 3, "x": 0, - "y": 0 + "y": 1 }, "id": 2, "options": { - "colorMode": "value", + "colorMode": "fixed", "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" + ] + } }, - "pluginVersion": "10.3.0-62488", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, hbase_cluster) (server_region_count{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\", instance=~\"$instance\"})", + "expr": "server_region_count{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "legendFormat": "{{instance}}", + "refId": "Region count" } ], "title": "Regions", @@ -93,64 +90,47 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "The number of store files on disk currently managed by the Region Server.", + "description": "The number of store files on disk currently managed by the RegionServer.", "fieldConfig": { "defaults": { "color": { - "mode": "thresholds" + "fixedColor": "text", + "mode": "fixed" }, - "decimals": 0, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [ ] + "unit": "short" + } }, "gridPos": { "h": 8, "w": 3, "x": 3, - "y": 0 + "y": 1 }, "id": 3, "options": { - "colorMode": "value", + "colorMode": "fixed", "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" + ] + } }, - "pluginVersion": "10.3.0-62488", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, hbase_cluster) (server_store_file_count{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\", instance=~\"$instance\"})", + "expr": "server_store_file_count{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "legendFormat": "{{instance}}", + "refId": "Store file count" } ], "title": "Store files", @@ -158,121 +138,95 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "The total size of the store files on disk managed by the Region Server.", + "description": "The total size of the store files on disk managed by the RegionServer.", "fieldConfig": { "defaults": { "color": { - "mode": "thresholds" - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "fixedColor": "text", + "mode": "fixed" }, "unit": "decbytes" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 3, "x": 6, - "y": 0 + "y": 1 }, "id": 4, "options": { - "displayMode": "gradient", - "minVizHeight": 10, - "minVizWidth": 0, - "namePlacement": "auto", - "orientation": "vertical", + "colorMode": "fixed", + "graphMode": "none", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false - }, - "showUnfilled": true, - "valueMode": "color" + ] + } }, - "pluginVersion": "10.3.0-62488", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, hbase_cluster) (server_store_file_size{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\", instance=~\"$instance\"})", + "expr": "server_store_file_size{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "legendFormat": "{{instance}}", + "refId": "Store file size" } ], "title": "Store file size", - "type": "bargauge" + "type": "stat" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "The number of open connections to the Region Server.", + "description": "The number of open connections to the RegionServer.", "fieldConfig": { "defaults": { "color": { - "mode": "thresholds" + "fixedColor": "text", + "mode": "fixed" }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "none" - }, - "overrides": [ ] + "unit": "short" + } }, "gridPos": { "h": 8, "w": 3, "x": 9, - "y": 0 + "y": 1 }, "id": 5, "options": { - "colorMode": "value", + "colorMode": "fixed", "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" + ] + } }, - "pluginVersion": "10.3.0-62488", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, hbase_cluster) (region_server_num_open_connections{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\", instance=~\"$instance\"})", + "expr": "region_server_num_open_connections{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "legendFormat": "{{instance}}", + "refId": "RPC connections" } ], "title": "RPC connections", @@ -280,94 +234,53 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "description": "Heap memory usage for the JVM.", "fieldConfig": { "defaults": { - "color": { - "mode": "continuous-BlYlRd" - }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", "fillOpacity": 30, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "showPoints": "never" }, - "mappings": [ ], "max": 1, "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, "unit": "percentunit" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 0 + "y": 1 }, "id": 6, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "jvm_metrics_mem_heap_used_m{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\", instance=~\"$instance\", processname=\"RegionServer\"} / clamp_min(jvm_metrics_mem_heap_committed_m{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\", instance=~\"$instance\", processname=\"RegionServer\"}, 1)", + "expr": "jvm_metrics_mem_heap_used_m{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\", processname=\"RegionServer\"} / clamp_min(jvm_metrics_mem_heap_committed_m{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\", processname=\"RegionServer\"}, 1)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "legendFormat": "{{instance}}", + "refId": "RegionServer JVM heap memory usage" } ], "title": "JVM heap memory usage", @@ -375,92 +288,60 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "The rate of requests received by the Region Server.", + "description": "The rate of requests received by the RegionServer.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "fixedColor": "light-purple", + "mode": "fixed" }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 30, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" + "mode": "normal" } }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, "unit": "reqps" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 16, "x": 0, - "y": 8 + "y": 9 }, "id": 7, + "maxDataPoints": 100, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "rate(server_total_request_count{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\", instance=~\"$instance\"}[$__rate_interval])", + "expr": "rate(server_total_request_count{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "legendFormat": "{{instance}}", + "refId": "Total requests" } ], "title": "Requests received", @@ -468,31 +349,20 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, - "description": "Requests received by the Region Server, broken down by type.", + "description": "Requests received by the RegionServer, broken down by type.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - } - }, - "mappings": [ ], "unit": "reqps" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 8, "x": 16, - "y": 8 + "y": 9 }, "id": 8, "options": { @@ -505,96 +375,112 @@ "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false + ] }, "tooltip": { - "mode": "multi", - "sort": "desc" + "mode": "multi" } }, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, hbase_cluster) (rate(server_read_request_count{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\", instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum by(job,hbase_cluster) (server_read_request_count{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "read" + "instant": false, + "legendFormat": "read", + "refId": "Read requests" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, hbase_cluster) (rate(server_write_request_count{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\", instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum by(job,hbase_cluster) (server_write_request_count{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "write" + "instant": false, + "legendFormat": "write", + "refId": "Write requests" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, hbase_cluster) (rate(server_cp_request_count{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\", instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum by(job,hbase_cluster) (server_cp_request_count{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "copy" + "instant": false, + "legendFormat": "copy", + "refId": "Copy requests" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, hbase_cluster) (rate(server_filtered_read_request_count{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\", instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum by(job,hbase_cluster) (server_filtered_read_request_count{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "filtered read" + "instant": false, + "legendFormat": "filtered read", + "refId": "Filtered read requests" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, hbase_cluster) (rate(server_rpc_get_request_count{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\", instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum by(job,hbase_cluster) (server_rpc_get_request_count{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "rpc get" + "instant": false, + "legendFormat": "rpc get", + "refId": "RPC get requests" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, hbase_cluster) (rate(server_rpc_scan_request_count{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\", instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum by(job,hbase_cluster) (server_rpc_scan_request_count{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "rpc scan" + "instant": false, + "legendFormat": "rpc scan", + "refId": "RPC scan requests" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, hbase_cluster) (rate(server_rpc_full_scan_request_count{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\", instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum by(job,hbase_cluster) (server_rpc_full_scan_request_count{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "rpc full scan" + "instant": false, + "legendFormat": "rpc full scan", + "refId": "RPC full scan requests" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, hbase_cluster) (rate(server_rpc_mutate_request_count{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\", instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum by(job,hbase_cluster) (server_rpc_mutate_request_count{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "rpc mutate" + "instant": false, + "legendFormat": "rpc mutate", + "refId": "RPC mutate requests" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, hbase_cluster) (rate(server_rpc_multi_request_count{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\", instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum by(job,hbase_cluster) (server_rpc_multi_request_count{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "rpc multi" + "instant": false, + "legendFormat": "rpc multi", + "refId": "RPC multi requests" } ], "title": "Requests overview", @@ -602,93 +488,51 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "The number of regions hosted by the Region Server.", + "description": "The number of regions hosted by the RegionServer.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", "fillOpacity": 30, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "showPoints": "never" }, - "decimals": 0, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [ ] + "unit": "short" + } }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 16 + "y": 17 }, "id": 9, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "server_region_count{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\", instance=~\"$instance\"}", + "expr": "server_region_count{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "legendFormat": "{{instance}}", + "refId": "Region count" } ], "title": "Region count", @@ -696,89 +540,56 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "The number of open connections to the Region Server.", + "description": "The number of open connections to the RegionServer.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", "fillOpacity": 30, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } }, - "decimals": 0, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "none" - }, - "overrides": [ ] + "decimals": 1, + "noValue": "No packets", + "unit": "short" + } }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 16 + "y": 17 }, "id": 10, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "region_server_num_open_connections{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\", instance=~\"$instance\"}", + "expr": "region_server_num_open_connections{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "legendFormat": "{{instance}}", + "refId": "RPC connections" } ], "title": "RPC connection count", @@ -786,93 +597,51 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "The number of store files on disk currently managed by the Region Server.", + "description": "The number of store files on disk currently managed by the RegionServer.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", "fillOpacity": 30, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "showPoints": "never" }, - "decimals": 0, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [ ] + "unit": "short" + } }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 24 + "y": 25 }, "id": 11, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "server_store_file_count{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\", instance=~\"$instance\"}", + "expr": "server_store_file_count{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "legendFormat": "{{instance}}", + "refId": "Store file count" } ], "title": "Store file count", @@ -880,92 +649,52 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "The total size of the store files on disk managed by the Region Server.", + "description": "The total size of the store files on disk managed by the RegionServer.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", "fillOpacity": 30, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "spanNulls": false }, "unit": "decbytes" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 24 + "y": 25 }, "id": 12, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "server_store_file_size{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\", instance=~\"$instance\"}", + "expr": "server_store_file_size{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "legendFormat": "{{instance}}", + "refId": "Store file size" } ], "title": "Store file size", @@ -973,141 +702,112 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "description": "The number of calls waiting to be processed by the Region Server.", + "description": "The number of calls waiting to be processed by the RegionServer.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", "fillOpacity": 30, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "showPoints": "never" }, - "unit": "none" - }, - "overrides": [ ] + "unit": "short" + } }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 32 + "y": 33 }, "id": 13, "options": { "legend": { + "asTable": true, "calcs": [ "min", "mean", "max" ], "displayMode": "table", - "placement": "right", - "showLegend": true + "placement": "right" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, hbase_cluster) (region_server_num_calls_in_general_queue{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\", instance=~\"$instance\"})", + "expr": "sum by(job,hbase_cluster) (region_server_num_calls_in_general_queue{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "general" + "instant": false, + "legendFormat": "general", + "refId": "Calls in general queue" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, hbase_cluster) (region_server_num_calls_in_replication_queue{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\", instance=~\"$instance\"})", + "expr": "sum by(job,hbase_cluster) (region_server_num_calls_in_replication_queue{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "replication" + "instant": false, + "legendFormat": "replication", + "refId": "Calls in replication queue" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, hbase_cluster) (region_server_num_calls_in_read_queue{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\", instance=~\"$instance\"})", + "expr": "sum by(job,hbase_cluster) (region_server_num_calls_in_read_queue{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "read" + "instant": false, + "legendFormat": "read", + "refId": "Calls in read queue" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, hbase_cluster) (region_server_num_calls_in_write_queue{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\", instance=~\"$instance\"})", + "expr": "sum by(job,hbase_cluster) (region_server_num_calls_in_write_queue{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "write" + "instant": false, + "legendFormat": "write", + "refId": "Calls in write queue" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, hbase_cluster) (region_server_num_calls_in_scan_queue{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\", instance=~\"$instance\"})", + "expr": "sum by(job,hbase_cluster) (region_server_num_calls_in_scan_queue{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "scan" + "instant": false, + "legendFormat": "scan", + "refId": "Calls in scan queue" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, hbase_cluster) (region_server_num_calls_in_priority_queue{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\", instance=~\"$instance\"})", + "expr": "sum by(job,hbase_cluster) (region_server_num_calls_in_priority_queue{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "priority" + "instant": false, + "legendFormat": "priority", + "refId": "Calls in priority queue" } ], "title": "Queued calls", @@ -1115,132 +815,105 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "description": "The rate of operations that are slow, as determined by HBase.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", "fillOpacity": 30, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, "showPoints": "never", "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, "unit": "ops" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 32 + "y": 33 }, "id": 14, "options": { "legend": { + "asTable": true, "calcs": [ "min", "mean", "max" ], "displayMode": "table", - "placement": "right", - "showLegend": true + "placement": "right" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, hbase_cluster) (rate(server_slow_append_count{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\", instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum by(job,hbase_cluster) (rate(server_slow_append_count{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "append" + "instant": false, + "legendFormat": "append", + "refId": "Slow appends" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, hbase_cluster) (rate(server_slow_put_count{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\", instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum by(job,hbase_cluster) (rate(server_slow_put_count{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "put" + "instant": false, + "legendFormat": "put", + "refId": "Slow puts" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, hbase_cluster) (rate(server_slow_delete_count{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\", instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum by(job,hbase_cluster) (rate(server_slow_delete_count{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "delete" + "instant": false, + "legendFormat": "delete", + "refId": "Slow deletes" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, hbase_cluster) (rate(server_slow_get_count{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\", instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum by(job,hbase_cluster) (rate(server_slow_get_count{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "get" + "instant": false, + "legendFormat": "get", + "refId": "Slow gets" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, hbase_cluster) (rate(server_slow_increment_count{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\", instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum by(job,hbase_cluster) (rate(server_slow_increment_count{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "increment" + "instant": false, + "legendFormat": "increment", + "refId": "Slow increments" } ], "title": "Slow operations", @@ -1248,94 +921,53 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "description": "The percent of time that requests hit the cache.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", "fillOpacity": 30, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "showPoints": "never" }, - "mappings": [ ], "max": 100, "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, "unit": "percent" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 40 + "y": 41 }, "id": 15, "options": { "legend": { "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "list" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "server_block_cache_express_hit_percent{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\", instance=~\"$instance\"}", + "expr": "server_block_cache_express_hit_percent{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "legendFormat": "{{instance}}", + "refId": "Block cache hit percentage" } ], "title": "Cache hit percentage", @@ -1343,105 +975,64 @@ }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "description": "The rate of successful and unsuccessful authentications.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", "fillOpacity": 30, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "showPoints": "never" }, "unit": "reqps" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 40 + "y": 41 }, "id": 16, "options": { "legend": { - "calcs": [ - "min", - "mean", - "max" - ], + "asTable": true, + "calcs": [ ], "displayMode": "table", - "placement": "right", - "showLegend": true + "placement": "right" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, hbase_cluster) (rate(region_server_authentication_successes{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\", instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum by(job,hbase_cluster) (rate(region_server_authentication_successes{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "success" + "instant": false, + "legendFormat": "success", + "refId": "RegionServer authentication successes" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job, hbase_cluster) (rate(region_server_authentication_failures{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\", instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum by(job,hbase_cluster) (rate(region_server_authentication_failures{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "failure" + "instant": false, + "legendFormat": "failure", + "refId": "RegionServer authentication failures" } ], "title": "Authentications", @@ -1449,90 +1040,71 @@ } ], "refresh": "1m", - "rows": [ ], - "schemaVersion": 14, - "style": "dark", + "schemaVersion": 39, "tags": [ "apache-hbase-mixin" ], "templating": { "list": [ { - "current": { }, - "hide": 0, - "label": "Data Source", + "label": "Prometheus data source", "name": "prometheus_datasource", - "options": [ ], "query": "prometheus", - "refresh": 1, "regex": "", "type": "datasource" }, { - "allValue": "", - "current": { }, + "allValue": ".+", "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "hide": 0, "includeAll": true, "label": "Job", "multi": true, "name": "job", - "options": [ ], - "query": "label_values(master_num_open_connections,job)", + "query": "label_values(server_num_region_servers{job=\"integrations/apache-hbase\"}, job)", "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "sort": 1, + "type": "query" }, { - "allValue": "", - "current": { }, + "allValue": ".+", "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "hide": 0, "includeAll": true, "label": "Apache HBase cluster", "multi": true, "name": "hbase_cluster", - "options": [ ], - "query": "label_values(master_num_open_connections{job=~\"$job\"},hbase_cluster)", + "query": "label_values(server_num_region_servers{job=\"integrations/apache-hbase\",job=~\"$job\"}, hbase_cluster)", "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "sort": 1, + "type": "query" }, { - "allValue": "", - "current": { }, + "allValue": ".+", "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "hide": 0, "includeAll": true, "label": "Instance", "multi": true, "name": "instance", - "options": [ ], - "query": "label_values(server_region_count{job=~\"$job\", hbase_cluster=~\"$hbase_cluster\"},instance)", + "query": "label_values(server_num_region_servers{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\"}, instance)", "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "hide": 2, + "label": "Loki data source", + "name": "loki_datasource", + "query": "loki", "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "type": "datasource" } ] }, @@ -1540,33 +1112,7 @@ "from": "now-30m", "to": "now" }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, "timezone": "default", "title": "Apache HBase RegionServer overview", - "uid": "apache-hbase-regionserver-overview", - "version": 0 + "uid": "apachehbase-regionserver-overview" } \ No newline at end of file diff --git a/apache-hbase-mixin/g.libsonnet b/apache-hbase-mixin/g.libsonnet new file mode 100644 index 000000000..e6a2060ee --- /dev/null +++ b/apache-hbase-mixin/g.libsonnet @@ -0,0 +1 @@ +import 'github.com/grafana/grafonnet/gen/grafonnet-v11.4.0/main.libsonnet' diff --git a/apache-hbase-mixin/jsonnetfile.json b/apache-hbase-mixin/jsonnetfile.json index e8255b65e..152235cc2 100644 --- a/apache-hbase-mixin/jsonnetfile.json +++ b/apache-hbase-mixin/jsonnetfile.json @@ -1,11 +1,20 @@ { "version": 1, "dependencies": [ + { + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs.git", + "subdir": "common-lib" + } + }, + "version": "master" + }, { "source": { "git": { "remote": "https://github.com/grafana/grafonnet.git", - "subdir": "gen/grafonnet-latest" + "subdir": "gen/grafonnet-v11.4.0" } }, "version": "main" @@ -13,8 +22,8 @@ { "source": { "git": { - "remote": "https://github.com/grafana/grafonnet-lib.git", - "subdir": "grafonnet" + "remote": "https://github.com/grafana/jsonnet-libs.git", + "subdir": "grafana-cloud-integration-utils" } }, "version": "master" @@ -30,4 +39,4 @@ } ], "legacyImports": true -} +} \ No newline at end of file diff --git a/apache-hbase-mixin/links.libsonnet b/apache-hbase-mixin/links.libsonnet new file mode 100644 index 000000000..e0ba3b37c --- /dev/null +++ b/apache-hbase-mixin/links.libsonnet @@ -0,0 +1,28 @@ +local g = import './g.libsonnet'; +{ + local link = g.dashboard.link, + new(this): + { + clusterOverview: + link.link.new(this.config.dashboardNamePrefix + ' cluster overview', '/d/' + this.grafana.dashboards['apache-hbase-cluster-overview.json'].uid) + + link.link.options.withKeepTime(true), + + regionServerOverview: + link.link.new(this.config.dashboardNamePrefix + ' RegionServer overview', '/d/' + this.grafana.dashboards['apache-hbase-regionserver-overview.json'].uid) + + link.link.options.withKeepTime(true), + + otherDashboards: + link.dashboards.new('All ' + this.config.dashboardNamePrefix + ' dashboards', this.config.dashboardTags) + + link.dashboards.options.withIncludeVars(true) + + link.dashboards.options.withKeepTime(true) + + link.dashboards.options.withAsDropdown(true), + } + + + if this.config.enableLokiLogs then + { + logs: + link.link.new(this.config.dashboardNamePrefix + ' logs', '/d/' + this.grafana.dashboards['apache-hbase-logs.json'].uid) + + link.link.options.withKeepTime(true), + } + else {}, +} diff --git a/apache-hbase-mixin/main.libsonnet b/apache-hbase-mixin/main.libsonnet new file mode 100644 index 000000000..d541b6980 --- /dev/null +++ b/apache-hbase-mixin/main.libsonnet @@ -0,0 +1,48 @@ +local alerts = import './alerts.libsonnet'; +local config = import './config.libsonnet'; +local dashboards = import './dashboards.libsonnet'; +local links = import './links.libsonnet'; +local panels = import './panels.libsonnet'; +local rows = import './rows.libsonnet'; +local commonlib = import 'common-lib/common/main.libsonnet'; + +{ + withConfigMixin(config): { + config+: config, + }, + + new(): { + local this = self, + config: config, + + signals: + { + [sig]: commonlib.signals.unmarshallJsonMulti( + this.config.signals[sig], + type=this.config.metricsSource + ) + for sig in std.objectFields(this.config.signals) + }, + + grafana: { + variables: commonlib.variables.new( + filteringSelector=this.config.filteringSelector, + groupLabels=this.config.groupLabels, + instanceLabels=this.config.instanceLabels, + varMetric='server_num_region_servers', + customAllValue='.+', + enableLokiLogs=this.config.enableLokiLogs, + ), + annotations: {}, + links: links.new(this), + panels: panels.new(this), + rows: rows.new(this), + dashboards: dashboards.new(this), + }, + + prometheus: { + alerts: alerts.new(this), + recordingRules: {}, + }, + }, +} diff --git a/apache-hbase-mixin/mixin.libsonnet b/apache-hbase-mixin/mixin.libsonnet index 4d987cf31..01fb679ea 100644 --- a/apache-hbase-mixin/mixin.libsonnet +++ b/apache-hbase-mixin/mixin.libsonnet @@ -1,3 +1,32 @@ -(import 'dashboards/dashboards.libsonnet') + -(import 'alerts/alerts.libsonnet') + -(import 'config.libsonnet') +local hbaselib = import './main.libsonnet'; +local config = (import './config.libsonnet'); +local util = import 'grafana-cloud-integration-utils/util.libsonnet'; + +local hbase = hbaselib.new() + + hbaselib.withConfigMixin( + { + filteringSelector: config.filteringSelector, + uid: config.uid, + enableLokiLogs: config.enableLokiLogs, + }, + ); + +local optional_lablels = { + cluster+: { + allValue: '.*', + }, + hbase_cluster+: { + label: 'Apache HBase cluster', + }, +}; + +{ + grafanaDashboards+:: { + [fname]: + local dashboard = hbase.grafana.dashboards[fname]; + dashboard + util.patch_variables(dashboard, optional_lablels) + for fname in std.objectFields(hbase.grafana.dashboards) + }, + prometheusAlerts+:: hbase.prometheus.alerts, + prometheusRules+:: hbase.prometheus.recordingRules, +} diff --git a/apache-hbase-mixin/panels.libsonnet b/apache-hbase-mixin/panels.libsonnet new file mode 100644 index 000000000..9b44d47ab --- /dev/null +++ b/apache-hbase-mixin/panels.libsonnet @@ -0,0 +1,466 @@ +local g = import './g.libsonnet'; +local commonlib = import 'common-lib/common/main.libsonnet'; + +{ + new(this):: + { + local signals = this.signals, + + // ========================== + // Cluster Dashboard Panels + // ========================== + + masterStatusHistoryPanel: + g.panel.statusHistory.new('Master status history') + + g.panel.statusHistory.panelOptions.withDescription('Displays the current active and backup masters.') + + g.panel.statusHistory.queryOptions.withTargets([ + signals.cluster.masterStatusHistoryNumRegionServers.asTarget(), + signals.cluster.nonMasterStatusHistoryNumRegionServers.asTarget(), + ]) + + g.panel.statusHistory.standardOptions.withMappings([ + { + options: { + '0': { + color: 'yellow', + index: 1, + text: 'Backup', + }, + '1': { + color: 'blue', + index: 0, + text: 'Active', + }, + }, + type: 'value', + }, + ]) + + g.panel.statusHistory.fieldConfig.defaults.custom.withLineWidth(1) + + g.panel.statusHistory.options.withShowValue('never') + + g.panel.statusHistory.options.legend.withShowLegend(true) + + g.panel.statusHistory.options.legend.withDisplayMode('list') + + g.panel.statusHistory.options.legend.withPlacement('bottom') + + g.panel.statusHistory.queryOptions.withMaxDataPoints(100) + + g.panel.statusHistory.standardOptions.thresholds.withSteps([]), + + liveRegionServersPanel: + commonlib.panels.generic.stat.info.new( + 'Live RegionServers', + targets=[signals.cluster.liveRegionServers.asTarget()], + description='Number of RegionServers that are currently live.' + ) + + g.panel.stat.standardOptions.withUnit('short'), + + deadRegionServersPanel: + commonlib.panels.generic.stat.info.new( + 'Dead RegionServers', + targets=[signals.cluster.deadRegionServers.asTarget()], + description='Number of RegionServers that are currently dead.' + ) + + g.panel.stat.standardOptions.withUnit('short') + + g.panel.stat.standardOptions.thresholds.withSteps([ + { color: 'green', value: null }, + { color: 'red', value: 1 }, + ]), + + serversPanel: + g.panel.table.new('Servers') + + g.panel.table.panelOptions.withDescription('Servers for a cluster.') + + g.panel.table.queryOptions.withTargets([ + signals.cluster.serverList.asTableTarget(), + signals.cluster.regionServerList.asTableTarget(), + ]) + + g.panel.table.queryOptions.withTransformations([ + { id: 'merge', options: {} }, + { + id: 'organize', + options: { + excludeByName: { + Time: true, + 'Time 1': true, + 'Time 2': true, + Value: true, + 'Value #A': true, + 'Value #B': true, + __name__: true, + '__name__ 1': true, + '__name__ 2': true, + clusterid: true, + 'clusterid 1': true, + 'clusterid 2': true, + context: true, + 'context 1': true, + 'context 2': true, + hbase_cluster: false, + 'hbase_cluster 1': true, + 'hbase_cluster 2': true, + instance: true, + 'instance 1': false, + 'instance 2': true, + isactivemaster: false, + 'isactivemaster 1': false, + 'isactivemaster 2': true, + job: true, + 'job 1': true, + 'job 2': true, + liveregionservers: true, + 'liveregionservers 1': true, + 'liveregionservers 2': true, + servername: false, + 'servername 1': false, + 'servername 2': true, + zookeeperquorum: true, + 'zookeeperquorum 1': true, + 'zookeeperquorum 2': true, + }, + indexByName: { + Time: 5, + 'Value #A': 12, + 'Value #B': 13, + __name__: 6, + clusterid: 7, + context: 8, + hbase_cluster: 4, + hostname: 1, + instance: 2, + isactivemaster: 3, + job: 9, + liveregionservers: 10, + servername: 0, + zookeeperquorum: 11, + }, + renameByName: { + Time: '', + deadregionservers: 'Dead server', + hbase_cluster: 'Cluster', + hostname: 'Hostname', + instance: 'Instance', + 'instance 1': '', + isactivemaster: 'Role', + 'isactivemaster 1': 'Master', + master_instance: 'Master', + region_server_instance: 'RegionServer', + servername: 'Servername', + 'servername 1': 'Servername', + }, + }, + }, + ]) + + g.panel.table.standardOptions.withOverrides([ + g.panel.table.fieldOverride.byName.new('RegionServer') + + g.panel.table.fieldOverride.byName.withPropertiesFromOptions( + g.panel.table.standardOptions.withLinks([ + { + title: '', + url: '/d/apachehbase-regionserver-overview?from=${__from}&to=${__to}&var-instance=${__data.fields["RegionServer"]}', + }, + ]) + ), + g.panel.table.fieldOverride.byName.new('Role') + + g.panel.table.fieldOverride.byName.withProperty('noValue', 'RegionServer') + + g.panel.table.fieldOverride.byName.withProperty('mappings', [ + { + options: { + 'false': { index: 1, text: 'backup master' }, + 'true': { color: 'text', index: 0, text: 'active master' }, + }, + type: 'value', + }, + ]), + ]), + + alertsPanel: + { + type: 'alertlist', + title: 'Alerts', + description: 'Panel to report on the status of integration alerts.', + targets: [ + signals.cluster.masterConnections.asTarget(), + ], + options: { + alertInstanceLabelFilter: '{job=~"${job:regex}", hbase_cluster=~"${hbase_cluster:regex}"}', + dashboardAlerts: false, + groupBy: [], + groupMode: 'default', + maxItems: 20, + sortOrder: 1, + stateFilter: { + firing: true, + pending: true, + noData: true, + normal: true, + 'error': true, + }, + }, + }, + + jvmHeapMemoryUsagePanel: + commonlib.panels.memory.timeSeries.usagePercent.new( + 'JVM heap memory usage', + targets=[signals.cluster.masterJvmHeapMemoryUsage.asTarget()], + description='Heap memory usage for the JVM.' + ) + + g.panel.timeSeries.standardOptions.withUnit('percentunit') + + g.panel.timeSeries.standardOptions.withMin(0) + + g.panel.timeSeries.standardOptions.withMax(1) + + g.panel.timeSeries.standardOptions.color.withMode('continuous-BlYlRd'), + + connectionsPanel: + commonlib.panels.network.timeSeries.base.new( + 'Connections', + targets=[ + signals.cluster.masterConnections.asTarget(), + signals.cluster.regionServerConnections.asTarget(), + ], + description='Number of open connections to the cluster.' + ) + + g.panel.timeSeries.standardOptions.withUnit('short') + + g.panel.timeSeries.standardOptions.withDecimals(0) + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + + authenticationsPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Authentications', + targets=[ + signals.cluster.masterAuthenticationSuccess.asTarget(), + signals.cluster.masterAuthenticationFailure.asTarget(), + signals.cluster.regionServerAuthenticationSuccess.asTarget(), + signals.cluster.regionServerAuthenticationFailure.asTarget(), + ], + description='Volume of successful and unsuccessful authentications.' + ) + + g.panel.timeSeries.standardOptions.withUnit('reqps') + + g.panel.timeSeries.options.legend.withAsTable(true) + + g.panel.timeSeries.options.legend.withDisplayMode('table') + + g.panel.timeSeries.options.legend.withPlacement('right') + + g.panel.timeSeries.options.legend.withCalcs(['min', 'mean', 'max']), + + masterQueueSizePanel: + commonlib.panels.generic.timeSeries.base.new( + 'Master queue size', + targets=[signals.cluster.masterQueueSize.asTarget()], + description='The size of the queue of requests, operations, and tasks to be processed by the master.' + ) + + g.panel.timeSeries.standardOptions.withUnit('decbytes'), + + masterQueuedCallsPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Master queued calls', + targets=[ + signals.cluster.masterCallsInGeneralQueue.asTarget(), + signals.cluster.masterCallsInReplicationQueue.asTarget(), + signals.cluster.masterCallsInReadQueue.asTarget(), + signals.cluster.masterCallsInWriteQueue.asTarget(), + signals.cluster.masterCallsInScanQueue.asTarget(), + signals.cluster.masterCallsInPriorityQueue.asTarget(), + ], + description='The number of calls waiting to be processed by the master.' + ) + + g.panel.timeSeries.standardOptions.withUnit('short') + + g.panel.timeSeries.options.legend.withAsTable(true) + + g.panel.timeSeries.options.legend.withDisplayMode('table') + + g.panel.timeSeries.options.legend.withPlacement('right') + + g.panel.timeSeries.options.legend.withCalcs(['min', 'mean', 'max']), + + regionsInTransitionPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Regions in transition', + targets=[ + signals.cluster.regionsInTransition.asTarget(), + signals.cluster.oldRegionsInTransition.asTarget(), + ], + description='The number of regions in transition for the cluster.' + ) + + g.panel.timeSeries.standardOptions.withUnit('short'), + + + oldestRegionInTransitionPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Oldest region in transition', + targets=[signals.cluster.oldestRegionInTransitionAge.asTarget()], + description='The age of the longest region in transition for the master of the cluster.' + ) + + g.panel.timeSeries.standardOptions.withUnit('ms'), + + + // ========================== + // RegionServer Dashboard Panels + // ========================== + + regionsPanel: + commonlib.panels.generic.stat.info.new( + 'Regions', + targets=[signals.regionserver.regionCount.asTarget()], + description='The number of regions hosted by the RegionServer.' + ) + + g.panel.stat.standardOptions.withUnit('short'), + + storeFilesPanel: + commonlib.panels.generic.stat.info.new( + 'Store files', + targets=[signals.regionserver.storeFileCount.asTarget()], + description='The number of store files on disk currently managed by the RegionServer.' + ) + + g.panel.stat.standardOptions.withUnit('short'), + + storeFileSizePanel: + commonlib.panels.generic.stat.info.new( + 'Store file size', + targets=[signals.regionserver.storeFileSize.asTarget()], + description='The total size of the store files on disk managed by the RegionServer.' + ) + + g.panel.stat.standardOptions.withUnit('decbytes'), + + + rpcConnectionsPanel: + commonlib.panels.generic.stat.info.new( + 'RPC connections', + targets=[signals.regionserver.rpcConnections.asTarget()], + description='The number of open connections to the RegionServer.' + ) + + g.panel.stat.standardOptions.withUnit('short'), + + regionServerJvmHeapMemoryUsagePanel: + commonlib.panels.generic.timeSeries.base.new( + 'JVM heap memory usage', + targets=[signals.regionserver.jvmHeapMemoryUsage.asTarget()], + description='Heap memory usage for the JVM.' + ) + + g.panel.timeSeries.standardOptions.withUnit('percentunit') + + g.panel.timeSeries.standardOptions.withMin(0) + + g.panel.timeSeries.standardOptions.withMax(1), + + requestsReceivedPanel: + commonlib.panels.requests.timeSeries.rate.new( + 'Requests received', + targets=[signals.regionserver.totalRequestRate.asTarget()], + description='The rate of requests received by the RegionServer.' + ) + + g.panel.timeSeries.standardOptions.withUnit('reqps'), + + requestsOverviewPanel: + g.panel.pieChart.new('Requests overview') + + g.panel.pieChart.panelOptions.withDescription('Requests received by the RegionServer, broken down by type.') + + g.panel.pieChart.queryOptions.withTargets([ + signals.regionserver.readRequestRate.asTarget(), + signals.regionserver.writeRequestRate.asTarget(), + signals.regionserver.cpRequestRate.asTarget(), + signals.regionserver.filteredReadRequestRate.asTarget(), + signals.regionserver.rpcGetRequestRate.asTarget(), + signals.regionserver.rpcScanRequestRate.asTarget(), + signals.regionserver.rpcFullScanRequestRate.asTarget(), + signals.regionserver.rpcMutateRequestRate.asTarget(), + signals.regionserver.rpcMultiRequestRate.asTarget(), + ]) + + g.panel.pieChart.standardOptions.withUnit('reqps') + + g.panel.pieChart.options.legend.withDisplayMode('list') + + g.panel.pieChart.options.legend.withPlacement('right') + + g.panel.pieChart.options.legend.withShowLegend(true) + + g.panel.pieChart.options.withPieType('pie') + + g.panel.pieChart.options.reduceOptions.withCalcs(['lastNotNull']) + + g.panel.pieChart.options.tooltip.withMode('multi'), + + regionCountPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Region count', + targets=[signals.regionserver.regionCount.asTarget()], + description='The number of regions hosted by the RegionServer.' + ) + + g.panel.timeSeries.standardOptions.withUnit('short'), + + + rpcConnectionCountPanel: + commonlib.panels.network.timeSeries.base.new( + 'RPC connection count', + targets=[signals.regionserver.rpcConnections.asTarget()], + description='The number of open connections to the RegionServer.' + ) + + g.panel.timeSeries.standardOptions.withUnit('short') + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + storeFileCountPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Store file count', + targets=[signals.regionserver.storeFileCount.asTarget()], + description='The number of store files on disk currently managed by the RegionServer.' + ) + + g.panel.timeSeries.standardOptions.withUnit('short'), + + storeFileSizeTimeseriesPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Store file size', + targets=[signals.regionserver.storeFileSize.asTarget()], + description='The total size of the store files on disk managed by the RegionServer.' + ) + + g.panel.timeSeries.standardOptions.withUnit('decbytes') + + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + + g.panel.timeSeries.fieldConfig.defaults.custom.withSpanNulls(false), + + queuedCallsPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Queued calls', + targets=[ + signals.regionserver.callsInGeneralQueue.asTarget(), + signals.regionserver.callsInReplicationQueue.asTarget(), + signals.regionserver.callsInReadQueue.asTarget(), + signals.regionserver.callsInWriteQueue.asTarget(), + signals.regionserver.callsInScanQueue.asTarget(), + signals.regionserver.callsInPriorityQueue.asTarget(), + ], + description='The number of calls waiting to be processed by the RegionServer.' + ) + + g.panel.timeSeries.standardOptions.withUnit('short') + + g.panel.timeSeries.options.legend.withAsTable(true) + + g.panel.timeSeries.options.legend.withDisplayMode('table') + + g.panel.timeSeries.options.legend.withPlacement('right') + + g.panel.timeSeries.options.legend.withCalcs(['min', 'mean', 'max']), + + slowOperationsPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Slow operations', + targets=[ + signals.regionserver.slowAppendRate.asTarget(), + signals.regionserver.slowPutRate.asTarget(), + signals.regionserver.slowDeleteRate.asTarget(), + signals.regionserver.slowGetRate.asTarget(), + signals.regionserver.slowIncrementRate.asTarget(), + ], + description='The rate of operations that are slow, as determined by HBase.' + ) + + g.panel.timeSeries.standardOptions.withUnit('ops') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + + g.panel.timeSeries.fieldConfig.defaults.custom.withSpanNulls(false) + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal') + + g.panel.timeSeries.options.legend.withAsTable(true) + + g.panel.timeSeries.options.legend.withDisplayMode('table') + + g.panel.timeSeries.options.legend.withPlacement('right') + + g.panel.timeSeries.options.legend.withCalcs(['min', 'mean', 'max']), + + cacheHitPercentagePanel: + commonlib.panels.generic.timeSeries.base.new( + 'Cache hit percentage', + targets=[signals.regionserver.blockCacheHitPercent.asTarget()], + description='The percent of time that requests hit the cache.' + ) + + g.panel.timeSeries.standardOptions.withUnit('percent') + + g.panel.timeSeries.standardOptions.withMin(0) + + g.panel.timeSeries.standardOptions.withMax(100), + + regionServerAuthenticationsPanel: + commonlib.panels.generic.timeSeries.base.new( + 'Authentications', + targets=[ + signals.regionserver.regionServerAuthenticationSuccess.asTarget(), + signals.regionserver.regionServerAuthenticationFailure.asTarget(), + ], + description='The rate of successful and unsuccessful authentications.' + ) + + g.panel.timeSeries.standardOptions.withUnit('reqps') + + g.panel.timeSeries.options.legend.withAsTable(true) + + g.panel.timeSeries.options.legend.withDisplayMode('table') + + g.panel.timeSeries.options.legend.withPlacement('right'), + }, +} diff --git a/apache-hbase-mixin/rows.libsonnet b/apache-hbase-mixin/rows.libsonnet new file mode 100644 index 000000000..2d89c0554 --- /dev/null +++ b/apache-hbase-mixin/rows.libsonnet @@ -0,0 +1,55 @@ +local g = import './g.libsonnet'; + +{ + new(this): + { + local panels = this.grafana.panels, + + // ========================== + // Cluster Overview Dashboard + // ========================== + + clusterOverview: + g.panel.row.new('Cluster overview') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + panels.masterStatusHistoryPanel { gridPos+: { w: 24 } }, + panels.liveRegionServersPanel { gridPos+: { w: 5 } }, + panels.deadRegionServersPanel { gridPos+: { w: 5 } }, + panels.serversPanel { gridPos+: { w: 14 } }, + panels.alertsPanel { gridPos+: { w: 12 } }, + panels.jvmHeapMemoryUsagePanel { gridPos+: { w: 12 } }, + panels.connectionsPanel { gridPos+: { w: 12 } }, + panels.authenticationsPanel { gridPos+: { w: 12 } }, + panels.masterQueueSizePanel { gridPos+: { w: 12 } }, + panels.masterQueuedCallsPanel { gridPos+: { w: 12 } }, + panels.regionsInTransitionPanel { gridPos+: { w: 12 } }, + panels.oldestRegionInTransitionPanel { gridPos+: { w: 12 } }, + ]), + + // ========================== + // RegionServer Overview Dashboard + // ========================== + + regionServerOverview: + g.panel.row.new('RegionServer overview') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + panels.regionsPanel { gridPos+: { w: 3 } }, + panels.storeFilesPanel { gridPos+: { w: 3 } }, + panels.storeFileSizePanel { gridPos+: { w: 3 } }, + panels.rpcConnectionsPanel { gridPos+: { w: 3 } }, + panels.regionServerJvmHeapMemoryUsagePanel { gridPos+: { w: 12 } }, + panels.requestsReceivedPanel { gridPos+: { w: 16 } }, + panels.requestsOverviewPanel { gridPos+: { w: 8 } }, + panels.regionCountPanel { gridPos+: { w: 12 } }, + panels.rpcConnectionCountPanel { gridPos+: { w: 12 } }, + panels.storeFileCountPanel { gridPos+: { w: 12 } }, + panels.storeFileSizeTimeseriesPanel { gridPos+: { w: 12 } }, + panels.queuedCallsPanel { gridPos+: { w: 12 } }, + panels.slowOperationsPanel { gridPos+: { w: 12 } }, + panels.cacheHitPercentagePanel { gridPos+: { w: 12 } }, + panels.regionServerAuthenticationsPanel { gridPos+: { w: 12 } }, + ]), + }, +} diff --git a/apache-hbase-mixin/signals/cluster.libsonnet b/apache-hbase-mixin/signals/cluster.libsonnet new file mode 100644 index 000000000..d6ba8ae0e --- /dev/null +++ b/apache-hbase-mixin/signals/cluster.libsonnet @@ -0,0 +1,316 @@ +local commonlib = import 'common-lib/common/main.libsonnet'; + +function(this) + local groupAggList = std.join(',', this.groupLabels); + local legendCustomTemplate = '{{hbase_cluster}}'; + local groupAggListWithInstance = groupAggList + ', ' + std.join(',', this.instanceLabels); + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + enableLokiLogs: this.enableLokiLogs, + aggLevel: 'none', + aggFunction: 'avg', + legendCustomTemplate: legendCustomTemplate, + discoveryMetric: { + prometheus: 'server_num_region_servers', + }, + signals: { + masterStatusHistoryNumRegionServers: { + name: 'Master status history number of region servers', + type: 'gauge', + description: 'The number of region servers for the master status history.', + unit: 'short', + sources: { + prometheus: { + expr: 'max without (clusterid,deadregionservers,liveregionservers,servername,zookeeperquorum,isactivemaster) (server_num_region_servers{%(queriesSelector)s, isactivemaster="true"} * 0 + 1 )', + }, + }, + }, + + nonMasterStatusHistoryNumRegionServers: { + name: 'Non-master status history number of region servers', + type: 'gauge', + description: 'The number of region servers for the non-master status history.', + unit: 'short', + sources: { + prometheus: { + expr: '(max without (clusterid,deadregionservers,liveregionservers,servername,zookeeperquorum,isactivemaster) (server_num_region_servers{%(queriesSelector)s, isactivemaster="false"}) * 0)', + }, + }, + }, + + // Master and cluster status + liveRegionServers: { + name: 'Live RegionServers', + type: 'gauge', + description: 'Number of RegionServers that are currently live.', + unit: 'short', + sources: { + prometheus: { + expr: 'server_num_region_servers{%(queriesSelector)s, isactivemaster="true"}', + }, + }, + }, + + deadRegionServers: { + name: 'Dead RegionServers', + type: 'gauge', + description: 'Number of RegionServers that are currently dead.', + unit: 'short', + sources: { + prometheus: { + expr: 'server_num_dead_region_servers{%(queriesSelector)s, isactivemaster="true"}', + }, + }, + }, + + // Connections + masterConnections: { + name: 'Master connections', + type: 'gauge', + description: 'Number of open connections to the master.', + unit: 'short', + sources: { + prometheus: { + expr: 'sum by(' + groupAggList + ') (master_num_open_connections{%(queriesSelector)s})', + legendCustomTemplate: legendCustomTemplate + ' - masters', + }, + }, + }, + + regionServerConnections: { + name: 'RegionServer connections', + type: 'gauge', + description: 'Number of open connections to RegionServers.', + unit: 'short', + sources: { + prometheus: { + expr: 'sum by(' + groupAggList + ') (region_server_num_open_connections{%(queriesSelector)s})', + legendCustomTemplate: legendCustomTemplate + ' - RegionServers', + }, + }, + }, + + // Authentication + masterAuthenticationSuccess: { + name: 'Master authentication successes', + type: 'raw', + description: 'Rate of successful authentications to the master.', + unit: 'reqps', + sources: { + prometheus: { + expr: 'sum by(' + groupAggList + ') (master_authentication_successes{%(queriesSelector)s})', + legendCustomTemplate: legendCustomTemplate + ' - masters success', + }, + }, + }, + + masterAuthenticationFailure: { + name: 'Master authentication failures', + type: 'raw', + description: 'Rate of failed authentications to the master.', + unit: 'reqps', + sources: { + prometheus: { + expr: 'sum by(' + groupAggList + ') (master_authentication_failures{%(queriesSelector)s})', + legendCustomTemplate: legendCustomTemplate + ' - masters failure', + }, + }, + }, + + regionServerAuthenticationSuccess: { + name: 'RegionServer authentication successes', + type: 'raw', + description: 'Rate of successful authentications to RegionServers.', + unit: 'reqps', + sources: { + prometheus: { + expr: 'sum by(' + groupAggList + ') (region_server_authentication_successes{%(queriesSelector)s})', + legendCustomTemplate: legendCustomTemplate + ' - rs success', + }, + }, + }, + + regionServerAuthenticationFailure: { + name: 'RegionServer authentication failures', + type: 'raw', + description: 'Rate of failed authentications to RegionServers.', + unit: 'reqps', + sources: { + prometheus: { + expr: 'sum by(' + groupAggList + ') (region_server_authentication_failures{%(queriesSelector)s})', + legendCustomTemplate: legendCustomTemplate + ' - rs failure', + }, + }, + }, + + // Master queue metrics + masterQueueSize: { + name: 'Master queue size', + type: 'gauge', + description: 'The size of the queue of requests, operations, and tasks to be processed by the master.', + unit: 'decbytes', + sources: { + prometheus: { + expr: 'master_queue_size{%(queriesSelector)s}', + legendCustomTemplate: legendCustomTemplate + ' - {{instance}}', + }, + }, + }, + + masterCallsInGeneralQueue: { + name: 'Master calls in general queue', + type: 'gauge', + description: 'Number of calls waiting in the general queue of the master.', + unit: 'short', + sources: { + prometheus: { + expr: 'sum by(' + groupAggList + ') (master_num_calls_in_general_queue{%(queriesSelector)s})', + legendCustomTemplate: legendCustomTemplate + ' - general', + }, + }, + }, + + serverList: { + name: 'Server live count', + type: 'gauge', + description: 'The number of live servers for the cluster.', + unit: 'short', + sources: { + prometheus: { + expr: 'label_replace(server_num_region_servers{%(queriesSelector)s}, "master_instance", "$1", "instance", "(.+)")', + }, + }, + }, + + regionServerList: { + name: 'Region server list', + type: 'gauge', + description: 'The list of region servers for the cluster.', + unit: 'short', + sources: { + prometheus: { + expr: 'label_replace(server_num_reference_files{%(queriesSelector)s}, "region_server_instance", "$1", "instance", "(.+)")', + }, + }, + }, + + masterCallsInReplicationQueue: { + name: 'Master calls in replication queue', + type: 'gauge', + description: 'Number of calls waiting in the replication queue of the master.', + unit: 'short', + sources: { + prometheus: { + expr: 'sum by(' + groupAggList + ') (master_num_calls_in_replication_queue{%(queriesSelector)s})', + legendCustomTemplate: legendCustomTemplate + ' - replication', + }, + }, + }, + + masterCallsInReadQueue: { + name: 'Master calls in read queue', + type: 'gauge', + description: 'Number of calls waiting in the read queue of the master.', + unit: 'short', + sources: { + prometheus: { + expr: 'sum by(' + groupAggList + ') (master_num_calls_in_read_queue{%(queriesSelector)s})', + legendCustomTemplate: legendCustomTemplate + ' - read', + }, + }, + }, + + masterCallsInWriteQueue: { + name: 'Master calls in write queue', + type: 'gauge', + description: 'Number of calls waiting in the write queue of the master.', + unit: 'short', + sources: { + prometheus: { + expr: 'sum by(' + groupAggList + ') (master_num_calls_in_write_queue{%(queriesSelector)s})', + legendCustomTemplate: legendCustomTemplate + ' - write', + }, + }, + }, + + masterCallsInScanQueue: { + name: 'Master calls in scan queue', + type: 'gauge', + description: 'Number of calls waiting in the scan queue of the master.', + unit: 'short', + sources: { + prometheus: { + expr: 'sum by(' + groupAggList + ') (master_num_calls_in_scan_queue{%(queriesSelector)s})', + legendCustomTemplate: legendCustomTemplate + ' - scan', + }, + }, + }, + + masterCallsInPriorityQueue: { + name: 'Master calls in priority queue', + type: 'gauge', + description: 'Number of calls waiting in the priority queue of the master.', + unit: 'short', + sources: { + prometheus: { + expr: 'sum by(' + groupAggList + ') (master_num_calls_in_priority_queue{%(queriesSelector)s})', + legendCustomTemplate: legendCustomTemplate + ' - priority', + }, + }, + }, + + // Regions in transition + regionsInTransition: { + name: 'Regions in transition', + type: 'gauge', + description: 'The number of regions in transition for the cluster.', + unit: 'short', + sources: { + prometheus: { + expr: 'assignment_manager_rit_count{%(queriesSelector)s}', + }, + }, + }, + + oldRegionsInTransition: { + name: 'Old regions in transition', + type: 'gauge', + description: 'The number of regions in transition that are over the threshold age.', + unit: 'short', + sources: { + prometheus: { + expr: 'assignment_manager_rit_count_over_threshold{%(queriesSelector)s}', + legendCustomTemplate: legendCustomTemplate + ' - old', + }, + }, + }, + + oldestRegionInTransitionAge: { + name: 'Oldest region in transition age', + type: 'gauge', + description: 'The age of the longest region in transition for the master of the cluster.', + unit: 'ms', + sources: { + prometheus: { + expr: 'assignment_manager_rit_oldest_age{%(queriesSelector)s}', + }, + }, + }, + + // JVM memory for masters + masterJvmHeapMemoryUsage: { + name: 'Master JVM heap memory usage', + type: 'raw', + description: 'Heap memory usage for the master JVM.', + unit: 'percentunit', + sources: { + prometheus: { + expr: 'jvm_metrics_mem_heap_used_m{%(queriesSelector)s, processname=~"Master"} / clamp_min(jvm_metrics_mem_heap_committed_m{%(queriesSelector)s, processname=~"Master"}, 1)', + legendCustomTemplate: legendCustomTemplate + ' - {{instance}}', + }, + }, + }, + }, + } diff --git a/apache-hbase-mixin/signals/regionserver.libsonnet b/apache-hbase-mixin/signals/regionserver.libsonnet new file mode 100644 index 000000000..b8141f73a --- /dev/null +++ b/apache-hbase-mixin/signals/regionserver.libsonnet @@ -0,0 +1,398 @@ +local commonlib = import 'common-lib/common/main.libsonnet'; + +function(this) + local groupAggList = std.join(',', this.groupLabels); + local groupAggListWithInstance = groupAggList + ', ' + std.join(',', this.instanceLabels); + local legendCustomTemplate = '{{instance}}'; + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + enableLokiLogs: this.enableLokiLogs, + aggLevel: 'none', + aggFunction: 'avg', + legendCustomTemplate: legendCustomTemplate, + discoveryMetric: { + prometheus: 'server_region_count', + }, + signals: { + // Region and store metrics + regionCount: { + name: 'Region count', + type: 'gauge', + description: 'The number of regions hosted by the RegionServer.', + unit: 'short', + sources: { + prometheus: { + expr: 'server_region_count{%(queriesSelector)s}', + }, + }, + }, + + storeFileCount: { + name: 'Store file count', + type: 'gauge', + description: 'The number of store files on disk currently managed by the RegionServer.', + unit: 'short', + sources: { + prometheus: { + expr: 'server_store_file_count{%(queriesSelector)s}', + }, + }, + }, + + storeFileSize: { + name: 'Store file size', + type: 'gauge', + description: 'The total size of the store files on disk managed by the RegionServer.', + unit: 'decbytes', + sources: { + prometheus: { + expr: 'server_store_file_size{%(queriesSelector)s}', + }, + }, + }, + + // Connection metrics + rpcConnections: { + name: 'RPC connections', + type: 'gauge', + description: 'The number of open connections to the RegionServer.', + unit: 'short', + sources: { + prometheus: { + expr: 'region_server_num_open_connections{%(queriesSelector)s}', + }, + }, + }, + + // Request metrics + totalRequestRate: { + name: 'Total requests', + type: 'counter', + description: 'The rate of requests received by the RegionServer.', + unit: 'reqps', + sources: { + prometheus: { + expr: 'server_total_request_count{%(queriesSelector)s}', + }, + }, + }, + + readRequestRate: { + name: 'Read requests', + type: 'raw', + description: 'The rate of read requests received by the RegionServer.', + unit: 'reqps', + sources: { + prometheus: { + expr: 'sum by(' + groupAggList + ') (server_read_request_count{%(queriesSelector)s})', + legendCustomTemplate: 'read', + }, + }, + }, + + writeRequestRate: { + name: 'Write requests', + type: 'raw', + description: 'The rate of write requests received by the RegionServer.', + unit: 'reqps', + sources: { + prometheus: { + expr: 'sum by(' + groupAggList + ') (server_write_request_count{%(queriesSelector)s})', + legendCustomTemplate: 'write', + }, + }, + }, + + cpRequestRate: { + name: 'Copy requests', + type: 'raw', + description: 'The rate of copy requests received by the RegionServer.', + unit: 'reqps', + sources: { + prometheus: { + expr: 'sum by(' + groupAggList + ') (server_cp_request_count{%(queriesSelector)s})', + legendCustomTemplate: 'copy', + }, + }, + }, + + filteredReadRequestRate: { + name: 'Filtered read requests', + type: 'raw', + description: 'The rate of filtered read requests received by the RegionServer.', + unit: 'reqps', + sources: { + prometheus: { + expr: 'sum by(' + groupAggList + ') (server_filtered_read_request_count{%(queriesSelector)s})', + legendCustomTemplate: 'filtered read', + }, + }, + }, + + rpcGetRequestRate: { + name: 'RPC get requests', + type: 'raw', + description: 'The rate of RPC get requests received by the RegionServer.', + unit: 'reqps', + sources: { + prometheus: { + expr: 'sum by(' + groupAggList + ') (server_rpc_get_request_count{%(queriesSelector)s})', + legendCustomTemplate: 'rpc get', + }, + }, + }, + + rpcScanRequestRate: { + name: 'RPC scan requests', + type: 'raw', + description: 'The rate of RPC scan requests received by the RegionServer.', + unit: 'reqps', + sources: { + prometheus: { + expr: 'sum by(' + groupAggList + ') (server_rpc_scan_request_count{%(queriesSelector)s})', + legendCustomTemplate: 'rpc scan', + }, + }, + }, + + rpcFullScanRequestRate: { + name: 'RPC full scan requests', + type: 'raw', + description: 'The rate of RPC full scan requests received by the RegionServer.', + unit: 'reqps', + sources: { + prometheus: { + expr: 'sum by(' + groupAggList + ') (server_rpc_full_scan_request_count{%(queriesSelector)s})', + legendCustomTemplate: 'rpc full scan', + }, + }, + }, + + rpcMutateRequestRate: { + name: 'RPC mutate requests', + type: 'raw', + description: 'The rate of RPC mutate requests received by the RegionServer.', + unit: 'reqps', + sources: { + prometheus: { + expr: 'sum by(' + groupAggList + ') (server_rpc_mutate_request_count{%(queriesSelector)s})', + legendCustomTemplate: 'rpc mutate', + }, + }, + }, + + rpcMultiRequestRate: { + name: 'RPC multi requests', + type: 'raw', + description: 'The rate of RPC multi requests received by the RegionServer.', + unit: 'reqps', + sources: { + prometheus: { + expr: 'sum by(' + groupAggList + ') (server_rpc_multi_request_count{%(queriesSelector)s})', + legendCustomTemplate: 'rpc multi', + }, + }, + }, + + // Queue metrics + callsInGeneralQueue: { + name: 'Calls in general queue', + type: 'gauge', + description: 'Number of calls waiting in the general queue of the RegionServer.', + unit: 'short', + sources: { + prometheus: { + expr: 'sum by(' + groupAggList + ') (region_server_num_calls_in_general_queue{%(queriesSelector)s})', + legendCustomTemplate: 'general', + }, + }, + }, + + callsInReplicationQueue: { + name: 'Calls in replication queue', + type: 'gauge', + description: 'Number of calls waiting in the replication queue of the RegionServer.', + unit: 'short', + sources: { + prometheus: { + expr: 'sum by(' + groupAggList + ') (region_server_num_calls_in_replication_queue{%(queriesSelector)s})', + legendCustomTemplate: 'replication', + }, + }, + }, + + callsInReadQueue: { + name: 'Calls in read queue', + type: 'gauge', + description: 'Number of calls waiting in the read queue of the RegionServer.', + unit: 'short', + sources: { + prometheus: { + expr: 'sum by(' + groupAggList + ') (region_server_num_calls_in_read_queue{%(queriesSelector)s})', + legendCustomTemplate: 'read', + }, + }, + }, + + callsInWriteQueue: { + name: 'Calls in write queue', + type: 'gauge', + description: 'Number of calls waiting in the write queue of the RegionServer.', + unit: 'short', + sources: { + prometheus: { + expr: 'sum by(' + groupAggList + ') (region_server_num_calls_in_write_queue{%(queriesSelector)s})', + legendCustomTemplate: 'write', + }, + }, + }, + + callsInScanQueue: { + name: 'Calls in scan queue', + type: 'gauge', + description: 'Number of calls waiting in the scan queue of the RegionServer.', + unit: 'short', + sources: { + prometheus: { + expr: 'sum by(' + groupAggList + ') (region_server_num_calls_in_scan_queue{%(queriesSelector)s})', + legendCustomTemplate: 'scan', + }, + }, + }, + + callsInPriorityQueue: { + name: 'Calls in priority queue', + type: 'gauge', + description: 'Number of calls waiting in the priority queue of the RegionServer.', + unit: 'short', + sources: { + prometheus: { + expr: 'sum by(' + groupAggList + ') (region_server_num_calls_in_priority_queue{%(queriesSelector)s})', + legendCustomTemplate: 'priority', + }, + }, + }, + + // Slow operations + slowAppendRate: { + name: 'Slow appends', + type: 'raw', + description: 'The rate of slow append operations.', + unit: 'ops', + sources: { + prometheus: { + expr: 'sum by(' + groupAggList + ') (rate(server_slow_append_count{%(queriesSelector)s}[$__rate_interval]))', + legendCustomTemplate: 'append', + }, + }, + }, + + slowPutRate: { + name: 'Slow puts', + type: 'raw', + description: 'The rate of slow put operations.', + unit: 'ops', + sources: { + prometheus: { + expr: 'sum by(' + groupAggList + ') (rate(server_slow_put_count{%(queriesSelector)s}[$__rate_interval]))', + legendCustomTemplate: 'put', + }, + }, + }, + + slowDeleteRate: { + name: 'Slow deletes', + type: 'raw', + description: 'The slow delete operations.', + unit: 'ops', + sources: { + prometheus: { + expr: 'sum by(' + groupAggList + ') (rate(server_slow_delete_count{%(queriesSelector)s}[$__rate_interval]))', + legendCustomTemplate: 'delete', + }, + }, + }, + + slowGetRate: { + name: 'Slow gets', + type: 'raw', + description: 'The rate of slow get operations.', + unit: 'ops', + sources: { + prometheus: { + expr: 'sum by(' + groupAggList + ') (rate(server_slow_get_count{%(queriesSelector)s}[$__rate_interval]))', + legendCustomTemplate: 'get', + }, + }, + }, + + slowIncrementRate: { + name: 'Slow increments', + type: 'raw', + description: 'The rate of slow increment operations.', + unit: 'ops', + sources: { + prometheus: { + expr: 'sum by(' + groupAggList + ') (rate(server_slow_increment_count{%(queriesSelector)s}[$__rate_interval]))', + legendCustomTemplate: 'increment', + }, + }, + }, + + // Cache metrics + blockCacheHitPercent: { + name: 'Block cache hit percentage', + type: 'gauge', + description: 'The percent of time that requests hit the cache.', + unit: 'percent', + sources: { + prometheus: { + expr: 'server_block_cache_express_hit_percent{%(queriesSelector)s}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + // JVM memory for RegionServers + jvmHeapMemoryUsage: { + name: 'RegionServer JVM heap memory usage', + type: 'raw', + description: 'Heap memory usage for the RegionServer JVM.', + unit: 'percentunit', + sources: { + prometheus: { + expr: 'jvm_metrics_mem_heap_used_m{%(queriesSelector)s, processname="RegionServer"} / clamp_min(jvm_metrics_mem_heap_committed_m{%(queriesSelector)s, processname="RegionServer"}, 1)', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + regionServerAuthenticationSuccess: { + name: 'RegionServer authentication successes', + type: 'raw', + description: 'The rate of successful authentications to the RegionServer.', + unit: 'reqps', + sources: { + prometheus: { + expr: 'sum by(' + groupAggList + ') (rate(region_server_authentication_successes{%(queriesSelector)s}[$__rate_interval]))', + legendCustomTemplate: 'success', + }, + }, + }, + + regionServerAuthenticationFailure: { + name: 'RegionServer authentication failures', + type: 'raw', + description: 'The rate of failed authentications to the RegionServer.', + unit: 'reqps', + sources: { + prometheus: { + expr: 'sum by(' + groupAggList + ') (rate(region_server_authentication_failures{%(queriesSelector)s}[$__rate_interval]))', + legendCustomTemplate: 'failure', + }, + }, + }, + }, + } From d9f61a3f330437d0d59c9569443824ada0131ce2 Mon Sep 17 00:00:00 2001 From: schmikei Date: Fri, 14 Nov 2025 09:36:21 -0500 Subject: [PATCH 2/4] remove prometheusv2 from config.libsonnet --- apache-hbase-mixin/config.libsonnet | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apache-hbase-mixin/config.libsonnet b/apache-hbase-mixin/config.libsonnet index 5e6ab839e..95098cb1d 100644 --- a/apache-hbase-mixin/config.libsonnet +++ b/apache-hbase-mixin/config.libsonnet @@ -11,7 +11,7 @@ dashboardPeriod: 'now-30m', dashboardTimezone: 'default', dashboardRefresh: '1m', - metricsSource: ['prometheus', 'prometheusv2'], + metricsSource: ['prometheus'], // Logging configuration enableLokiLogs: true, From 13c038665a4afb11d1285ff69b03d51ef64b116a Mon Sep 17 00:00:00 2001 From: schmikei Date: Wed, 26 Nov 2025 16:08:12 -0500 Subject: [PATCH 3/4] pr feedback --- apache-hbase-mixin/alerts.libsonnet | 6 +- apache-hbase-mixin/dashboards.libsonnet | 2 +- .../apache-hbase-cluster-overview.json | 10 +- ...s-overview.json => apache-hbase-logs.json} | 2 +- .../apache-hbase-regionserver-overview.json | 40 +++---- apache-hbase-mixin/mixin.libsonnet | 4 +- apache-hbase-mixin/panels.libsonnet | 14 +-- .../prometheus_alerts.yaml | 6 +- apache-hbase-mixin/signals/cluster.libsonnet | 8 +- .../signals/regionserver.libsonnet | 107 +++++++++++------- 10 files changed, 115 insertions(+), 84 deletions(-) rename apache-hbase-mixin/dashboards_out/{apache-hbase-logs-overview.json => apache-hbase-logs.json} (99%) diff --git a/apache-hbase-mixin/alerts.libsonnet b/apache-hbase-mixin/alerts.libsonnet index 3f3557d06..b4b30f9a3 100644 --- a/apache-hbase-mixin/alerts.libsonnet +++ b/apache-hbase-mixin/alerts.libsonnet @@ -41,7 +41,7 @@ { alert: 'HBaseOldRegionsInTransition', expr: ||| - 100 * assignment_manager_rit_count_over_threshold / clamp_min(assignment_manager_rit_count, 1) > %(alertsOldRegionsInTransition)s + 100 * assignment_manager_rit_count_over_threshold / clamp_min(assignment_manager_rit_count{%(filteringSelector)s}, 1) > %(alertsOldRegionsInTransition)s ||| % this.config, 'for': '5m', labels: { @@ -58,7 +58,7 @@ { alert: 'HBaseHighMasterAuthFailRate', expr: ||| - 100 * rate(master_authentication_failures[5m]) / (clamp_min(rate(master_authentication_successes[5m]), 1) + clamp_min(rate(master_authentication_failures[5m]), 1)) > %(alertsHighMasterAuthFailRate)s + 100 * rate(master_authentication_failures[5m]) / (clamp_min(rate(master_authentication_successes{%(filteringSelector)s}[5m]), 1) + clamp_min(rate(master_authentication_failures{%(filteringSelector)s}[5m]), 1)) > %(alertsHighMasterAuthFailRate)s ||| % this.config, 'for': '5m', labels: { @@ -75,7 +75,7 @@ { alert: 'HBaseHighRSAuthFailRate', expr: ||| - 100 * rate(region_server_authentication_failures[5m]) / (clamp_min(rate(region_server_authentication_successes[5m]), 1) + clamp_min(rate(region_server_authentication_failures[5m]), 1)) > %(alertsHighRSAuthFailRate)s + 100 * rate(region_server_authentication_failures[5m]) / (clamp_min(rate(region_server_authentication_successes{%(filteringSelector)s}[5m]), 1) + clamp_min(rate(region_server_authentication_failures{%(filteringSelector)s}[5m]), 1)) > %(alertsHighRSAuthFailRate)s ||| % this.config, 'for': '5m', labels: { diff --git a/apache-hbase-mixin/dashboards.libsonnet b/apache-hbase-mixin/dashboards.libsonnet index 99d27da6c..68e7c8a9a 100644 --- a/apache-hbase-mixin/dashboards.libsonnet +++ b/apache-hbase-mixin/dashboards.libsonnet @@ -75,7 +75,7 @@ local logslib = import 'logs-lib/logs/main.libsonnet'; dashboards+: { logs+: - root.applyCommon(super.logs.templating.list, uid=uid + '_logs', tags=tags, links=links { logs+:: {} }, annotations=annotations, timezone=timezone, refresh=refresh, period=period), + root.applyCommon(super.logs.templating.list, uid=uid + '-logs', tags=tags, links=links { logs+:: {} }, annotations=annotations, timezone=timezone, refresh=refresh, period=period), }, panels+: { diff --git a/apache-hbase-mixin/dashboards_out/apache-hbase-cluster-overview.json b/apache-hbase-mixin/dashboards_out/apache-hbase-cluster-overview.json index 63836afe4..9fce1e270 100644 --- a/apache-hbase-mixin/dashboards_out/apache-hbase-cluster-overview.json +++ b/apache-hbase-mixin/dashboards_out/apache-hbase-cluster-overview.json @@ -7,7 +7,7 @@ "keepTime": true, "title": "Apache HBase logs", "type": "link", - "url": "/d/apachehbase_logs" + "url": "/d/apachehbase-logs" }, { "asDropdown": true, @@ -611,7 +611,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job,hbase_cluster) (master_authentication_successes{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"})", + "expr": "sum by(job,hbase_cluster) (rate(master_authentication_successes{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "instant": false, "legendFormat": "{{hbase_cluster}} - masters success", @@ -622,7 +622,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job,hbase_cluster) (master_authentication_failures{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"})", + "expr": "sum by(job,hbase_cluster) (rate(master_authentication_failures{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "instant": false, "legendFormat": "{{hbase_cluster}} - masters failure", @@ -633,7 +633,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job,hbase_cluster) (region_server_authentication_successes{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"})", + "expr": "sum by(job,hbase_cluster) (rate(region_server_authentication_successes{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "instant": false, "legendFormat": "{{hbase_cluster}} - rs success", @@ -644,7 +644,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job,hbase_cluster) (region_server_authentication_failures{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"})", + "expr": "sum by(job,hbase_cluster) (rate(region_server_authentication_failures{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "instant": false, "legendFormat": "{{hbase_cluster}} - rs failure", diff --git a/apache-hbase-mixin/dashboards_out/apache-hbase-logs-overview.json b/apache-hbase-mixin/dashboards_out/apache-hbase-logs.json similarity index 99% rename from apache-hbase-mixin/dashboards_out/apache-hbase-logs-overview.json rename to apache-hbase-mixin/dashboards_out/apache-hbase-logs.json index 121214aee..d9c781026 100644 --- a/apache-hbase-mixin/dashboards_out/apache-hbase-logs-overview.json +++ b/apache-hbase-mixin/dashboards_out/apache-hbase-logs.json @@ -330,5 +330,5 @@ }, "timezone": "default", "title": "Apache HBase logs", - "uid": "apachehbase_logs" + "uid": "apachehbase-logs" } \ No newline at end of file diff --git a/apache-hbase-mixin/dashboards_out/apache-hbase-regionserver-overview.json b/apache-hbase-mixin/dashboards_out/apache-hbase-regionserver-overview.json index 5116eeacc..da1d9a4bc 100644 --- a/apache-hbase-mixin/dashboards_out/apache-hbase-regionserver-overview.json +++ b/apache-hbase-mixin/dashboards_out/apache-hbase-regionserver-overview.json @@ -13,7 +13,7 @@ "keepTime": true, "title": "Apache HBase logs", "type": "link", - "url": "/d/apachehbase_logs" + "url": "/d/apachehbase-logs" }, { "asDropdown": true, @@ -78,11 +78,11 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "server_region_count{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"}", + "expr": "sum by(job,hbase_cluster) (server_region_count{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"})", "format": "time_series", "instant": false, "legendFormat": "{{instance}}", - "refId": "Region count" + "refId": "Region count aggregated" } ], "title": "Regions", @@ -126,11 +126,11 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "server_store_file_count{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"}", + "expr": "sum by(job,hbase_cluster) (server_store_file_count{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"})", "format": "time_series", "instant": false, "legendFormat": "{{instance}}", - "refId": "Store file count" + "refId": "Store file count aggregated" } ], "title": "Store files", @@ -174,11 +174,11 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "server_store_file_size{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"}", + "expr": "sum by(job,hbase_cluster) (server_store_file_size{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"})", "format": "time_series", "instant": false, "legendFormat": "{{instance}}", - "refId": "Store file size" + "refId": "Store file size aggregated" } ], "title": "Store file size", @@ -222,11 +222,11 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "region_server_num_open_connections{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"}", + "expr": "sum by(job,hbase_cluster) (region_server_num_open_connections{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"})", "format": "time_series", "instant": false, "legendFormat": "{{instance}}", - "refId": "RPC connections" + "refId": "RPC connections aggregated" } ], "title": "RPC connections", @@ -388,7 +388,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job,hbase_cluster) (server_read_request_count{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"})", + "expr": "sum by(job,hbase_cluster) (rate(server_read_request_count{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "instant": false, "legendFormat": "read", @@ -399,7 +399,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job,hbase_cluster) (server_write_request_count{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"})", + "expr": "sum by(job,hbase_cluster) (rate(server_write_request_count{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "instant": false, "legendFormat": "write", @@ -410,7 +410,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job,hbase_cluster) (server_cp_request_count{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"})", + "expr": "sum by(job,hbase_cluster) (rate(server_cp_request_count{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "instant": false, "legendFormat": "copy", @@ -421,7 +421,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job,hbase_cluster) (server_filtered_read_request_count{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"})", + "expr": "sum by(job,hbase_cluster) (rate(server_filtered_read_request_count{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "instant": false, "legendFormat": "filtered read", @@ -432,7 +432,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job,hbase_cluster) (server_rpc_get_request_count{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"})", + "expr": "sum by(job,hbase_cluster) (rate(server_rpc_get_request_count{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "instant": false, "legendFormat": "rpc get", @@ -443,7 +443,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job,hbase_cluster) (server_rpc_scan_request_count{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"})", + "expr": "sum by(job,hbase_cluster) (rate(server_rpc_scan_request_count{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "instant": false, "legendFormat": "rpc scan", @@ -454,7 +454,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job,hbase_cluster) (server_rpc_full_scan_request_count{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"})", + "expr": "sum by(job,hbase_cluster) (rate(server_rpc_full_scan_request_count{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "instant": false, "legendFormat": "rpc full scan", @@ -465,7 +465,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job,hbase_cluster) (server_rpc_mutate_request_count{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"})", + "expr": "sum by(job,hbase_cluster) (rate(server_rpc_mutate_request_count{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "instant": false, "legendFormat": "rpc mutate", @@ -476,7 +476,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job,hbase_cluster) (server_rpc_multi_request_count{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"})", + "expr": "sum by(job,hbase_cluster) (rate(server_rpc_multi_request_count{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "instant": false, "legendFormat": "rpc multi", @@ -1020,7 +1020,7 @@ "expr": "sum by(job,hbase_cluster) (rate(region_server_authentication_successes{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "instant": false, - "legendFormat": "success", + "legendFormat": "{{hbase_cluster}} - rs success", "refId": "RegionServer authentication successes" }, { @@ -1031,7 +1031,7 @@ "expr": "sum by(job,hbase_cluster) (rate(region_server_authentication_failures{job=\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "instant": false, - "legendFormat": "failure", + "legendFormat": "{{hbase_cluster}} - rs failure", "refId": "RegionServer authentication failures" } ], diff --git a/apache-hbase-mixin/mixin.libsonnet b/apache-hbase-mixin/mixin.libsonnet index 01fb679ea..987c73894 100644 --- a/apache-hbase-mixin/mixin.libsonnet +++ b/apache-hbase-mixin/mixin.libsonnet @@ -11,7 +11,7 @@ local hbase = hbaselib.new() }, ); -local optional_lablels = { +local optional_labels = { cluster+: { allValue: '.*', }, @@ -24,7 +24,7 @@ local optional_lablels = { grafanaDashboards+:: { [fname]: local dashboard = hbase.grafana.dashboards[fname]; - dashboard + util.patch_variables(dashboard, optional_lablels) + dashboard + util.patch_variables(dashboard, optional_labels) for fname in std.objectFields(hbase.grafana.dashboards) }, prometheusAlerts+:: hbase.prometheus.alerts, diff --git a/apache-hbase-mixin/panels.libsonnet b/apache-hbase-mixin/panels.libsonnet index 9b44d47ab..f2f8a8499 100644 --- a/apache-hbase-mixin/panels.libsonnet +++ b/apache-hbase-mixin/panels.libsonnet @@ -151,7 +151,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; g.panel.table.standardOptions.withLinks([ { title: '', - url: '/d/apachehbase-regionserver-overview?from=${__from}&to=${__to}&var-instance=${__data.fields["RegionServer"]}', + url: '/d/'+ this.grafana.dashboards['apache-hbase-regionserver-overview.json'].uid + '?from=${__from}&to=${__to}&var-instance=${__data.fields["RegionServer"]}', }, ]) ), @@ -290,7 +290,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; regionsPanel: commonlib.panels.generic.stat.info.new( 'Regions', - targets=[signals.regionserver.regionCount.asTarget()], + targets=[signals.regionserver.regionCountAggregated.asTarget()], description='The number of regions hosted by the RegionServer.' ) + g.panel.stat.standardOptions.withUnit('short'), @@ -298,7 +298,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; storeFilesPanel: commonlib.panels.generic.stat.info.new( 'Store files', - targets=[signals.regionserver.storeFileCount.asTarget()], + targets=[signals.regionserver.storeFileCountAggregated.asTarget()], description='The number of store files on disk currently managed by the RegionServer.' ) + g.panel.stat.standardOptions.withUnit('short'), @@ -306,7 +306,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; storeFileSizePanel: commonlib.panels.generic.stat.info.new( 'Store file size', - targets=[signals.regionserver.storeFileSize.asTarget()], + targets=[signals.regionserver.storeFileSizeAggregated.asTarget()], description='The total size of the store files on disk managed by the RegionServer.' ) + g.panel.stat.standardOptions.withUnit('decbytes'), @@ -315,7 +315,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; rpcConnectionsPanel: commonlib.panels.generic.stat.info.new( 'RPC connections', - targets=[signals.regionserver.rpcConnections.asTarget()], + targets=[signals.regionserver.rpcConnectionsAggregated.asTarget()], description='The number of open connections to the RegionServer.' ) + g.panel.stat.standardOptions.withUnit('short'), @@ -453,8 +453,8 @@ local commonlib = import 'common-lib/common/main.libsonnet'; commonlib.panels.generic.timeSeries.base.new( 'Authentications', targets=[ - signals.regionserver.regionServerAuthenticationSuccess.asTarget(), - signals.regionserver.regionServerAuthenticationFailure.asTarget(), + signals.cluster.regionServerAuthenticationSuccess.asTarget(), + signals.cluster.regionServerAuthenticationFailure.asTarget(), ], description='The rate of successful and unsuccessful authentications.' ) diff --git a/apache-hbase-mixin/prometheus_rules_out/prometheus_alerts.yaml b/apache-hbase-mixin/prometheus_rules_out/prometheus_alerts.yaml index 24e4784f3..16b0c55ae 100644 --- a/apache-hbase-mixin/prometheus_rules_out/prometheus_alerts.yaml +++ b/apache-hbase-mixin/prometheus_rules_out/prometheus_alerts.yaml @@ -24,7 +24,7 @@ groups: description: '{{printf "%.0f" $value}} percent of RegionServers in transition in cluster {{$labels.hbase_cluster}} are transitioning for longer than expected, which is above the threshold of 50 percent' summary: RegionServers are in transition for longer than expected. expr: | - 100 * assignment_manager_rit_count_over_threshold / clamp_min(assignment_manager_rit_count, 1) > 50 + 100 * assignment_manager_rit_count_over_threshold / clamp_min(assignment_manager_rit_count{job="integrations/apache-hbase"}, 1) > 50 for: 5m labels: severity: warning @@ -33,7 +33,7 @@ groups: description: '{{printf "%.0f" $value}} percent of authentication attempts to the master are failing in cluster {{$labels.hbase_cluster}}, which is above the threshold of 35 percent' summary: A high percentage of authentication attempts to the master are failing. expr: | - 100 * rate(master_authentication_failures[5m]) / (clamp_min(rate(master_authentication_successes[5m]), 1) + clamp_min(rate(master_authentication_failures[5m]), 1)) > 35 + 100 * rate(master_authentication_failures[5m]) / (clamp_min(rate(master_authentication_successes{job="integrations/apache-hbase"}[5m]), 1) + clamp_min(rate(master_authentication_failures{job="integrations/apache-hbase"}[5m]), 1)) > 35 for: 5m labels: severity: warning @@ -42,7 +42,7 @@ groups: description: '{{printf "%.0f" $value}} percent of authentication attempts to the RegionServer {{$labels.instance}} are failing in cluster {{$labels.hbase_cluster}}, which is above the threshold of 35 percent' summary: A high percentage of authentication attempts to a RegionServer are failing. expr: | - 100 * rate(region_server_authentication_failures[5m]) / (clamp_min(rate(region_server_authentication_successes[5m]), 1) + clamp_min(rate(region_server_authentication_failures[5m]), 1)) > 35 + 100 * rate(region_server_authentication_failures[5m]) / (clamp_min(rate(region_server_authentication_successes{job="integrations/apache-hbase"}[5m]), 1) + clamp_min(rate(region_server_authentication_failures{job="integrations/apache-hbase"}[5m]), 1)) > 35 for: 5m labels: severity: warning diff --git a/apache-hbase-mixin/signals/cluster.libsonnet b/apache-hbase-mixin/signals/cluster.libsonnet index d6ba8ae0e..72e3e668e 100644 --- a/apache-hbase-mixin/signals/cluster.libsonnet +++ b/apache-hbase-mixin/signals/cluster.libsonnet @@ -100,7 +100,7 @@ function(this) unit: 'reqps', sources: { prometheus: { - expr: 'sum by(' + groupAggList + ') (master_authentication_successes{%(queriesSelector)s})', + expr: 'sum by(' + groupAggList + ') (rate(master_authentication_successes{%(queriesSelector)s}[$__rate_interval]))', legendCustomTemplate: legendCustomTemplate + ' - masters success', }, }, @@ -113,7 +113,7 @@ function(this) unit: 'reqps', sources: { prometheus: { - expr: 'sum by(' + groupAggList + ') (master_authentication_failures{%(queriesSelector)s})', + expr: 'sum by(' + groupAggList + ') (rate(master_authentication_failures{%(queriesSelector)s}[$__rate_interval]))', legendCustomTemplate: legendCustomTemplate + ' - masters failure', }, }, @@ -126,7 +126,7 @@ function(this) unit: 'reqps', sources: { prometheus: { - expr: 'sum by(' + groupAggList + ') (region_server_authentication_successes{%(queriesSelector)s})', + expr: 'sum by(' + groupAggList + ') (rate(region_server_authentication_successes{%(queriesSelector)s}[$__rate_interval]))', legendCustomTemplate: legendCustomTemplate + ' - rs success', }, }, @@ -139,7 +139,7 @@ function(this) unit: 'reqps', sources: { prometheus: { - expr: 'sum by(' + groupAggList + ') (region_server_authentication_failures{%(queriesSelector)s})', + expr: 'sum by(' + groupAggList + ') (rate(region_server_authentication_failures{%(queriesSelector)s}[$__rate_interval]))', legendCustomTemplate: legendCustomTemplate + ' - rs failure', }, }, diff --git a/apache-hbase-mixin/signals/regionserver.libsonnet b/apache-hbase-mixin/signals/regionserver.libsonnet index b8141f73a..a1274b767 100644 --- a/apache-hbase-mixin/signals/regionserver.libsonnet +++ b/apache-hbase-mixin/signals/regionserver.libsonnet @@ -25,6 +25,20 @@ function(this) sources: { prometheus: { expr: 'server_region_count{%(queriesSelector)s}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + regionCountAggregated: { + name: 'Region count aggregated', + type: 'gauge', + description: 'The total number of regions across all RegionServers.', + unit: 'short', + sources: { + prometheus: { + expr: 'sum by(' + groupAggList + ') (server_region_count{%(queriesSelector)s})', + legendCustomTemplate: '{{instance}}', }, }, }, @@ -37,6 +51,20 @@ function(this) sources: { prometheus: { expr: 'server_store_file_count{%(queriesSelector)s}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + storeFileCountAggregated: { + name: 'Store file count aggregated', + type: 'gauge', + description: 'The total number of store files across all RegionServers.', + unit: 'short', + sources: { + prometheus: { + expr: 'sum by(' + groupAggList + ') (server_store_file_count{%(queriesSelector)s})', + legendCustomTemplate: '{{instance}}', }, }, }, @@ -49,6 +77,20 @@ function(this) sources: { prometheus: { expr: 'server_store_file_size{%(queriesSelector)s}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + storeFileSizeAggregated: { + name: 'Store file size aggregated', + type: 'gauge', + description: 'The total size of store files across all RegionServers.', + unit: 'decbytes', + sources: { + prometheus: { + expr: 'sum by(' + groupAggList + ') (server_store_file_size{%(queriesSelector)s})', + legendCustomTemplate: '{{instance}}', }, }, }, @@ -62,6 +104,20 @@ function(this) sources: { prometheus: { expr: 'region_server_num_open_connections{%(queriesSelector)s}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + rpcConnectionsAggregated: { + name: 'RPC connections aggregated', + type: 'gauge', + description: 'The total number of open connections across all RegionServers.', + unit: 'short', + sources: { + prometheus: { + expr: 'sum by(' + groupAggList + ') (region_server_num_open_connections{%(queriesSelector)s})', + legendCustomTemplate: '{{instance}}', }, }, }, @@ -69,12 +125,13 @@ function(this) // Request metrics totalRequestRate: { name: 'Total requests', - type: 'counter', + type: 'raw', description: 'The rate of requests received by the RegionServer.', unit: 'reqps', sources: { prometheus: { - expr: 'server_total_request_count{%(queriesSelector)s}', + expr: 'rate(server_total_request_count{%(queriesSelector)s}[$__rate_interval])', + legendCustomTemplate: '{{instance}}', }, }, }, @@ -86,7 +143,7 @@ function(this) unit: 'reqps', sources: { prometheus: { - expr: 'sum by(' + groupAggList + ') (server_read_request_count{%(queriesSelector)s})', + expr: 'sum by(' + groupAggList + ') (rate(server_read_request_count{%(queriesSelector)s}[$__rate_interval]))', legendCustomTemplate: 'read', }, }, @@ -99,7 +156,7 @@ function(this) unit: 'reqps', sources: { prometheus: { - expr: 'sum by(' + groupAggList + ') (server_write_request_count{%(queriesSelector)s})', + expr: 'sum by(' + groupAggList + ') (rate(server_write_request_count{%(queriesSelector)s}[$__rate_interval]))', legendCustomTemplate: 'write', }, }, @@ -112,7 +169,7 @@ function(this) unit: 'reqps', sources: { prometheus: { - expr: 'sum by(' + groupAggList + ') (server_cp_request_count{%(queriesSelector)s})', + expr: 'sum by(' + groupAggList + ') (rate(server_cp_request_count{%(queriesSelector)s}[$__rate_interval]))', legendCustomTemplate: 'copy', }, }, @@ -125,7 +182,7 @@ function(this) unit: 'reqps', sources: { prometheus: { - expr: 'sum by(' + groupAggList + ') (server_filtered_read_request_count{%(queriesSelector)s})', + expr: 'sum by(' + groupAggList + ') (rate(server_filtered_read_request_count{%(queriesSelector)s}[$__rate_interval]))', legendCustomTemplate: 'filtered read', }, }, @@ -138,7 +195,7 @@ function(this) unit: 'reqps', sources: { prometheus: { - expr: 'sum by(' + groupAggList + ') (server_rpc_get_request_count{%(queriesSelector)s})', + expr: 'sum by(' + groupAggList + ') (rate(server_rpc_get_request_count{%(queriesSelector)s}[$__rate_interval]))', legendCustomTemplate: 'rpc get', }, }, @@ -151,7 +208,7 @@ function(this) unit: 'reqps', sources: { prometheus: { - expr: 'sum by(' + groupAggList + ') (server_rpc_scan_request_count{%(queriesSelector)s})', + expr: 'sum by(' + groupAggList + ') (rate(server_rpc_scan_request_count{%(queriesSelector)s}[$__rate_interval]))', legendCustomTemplate: 'rpc scan', }, }, @@ -164,7 +221,7 @@ function(this) unit: 'reqps', sources: { prometheus: { - expr: 'sum by(' + groupAggList + ') (server_rpc_full_scan_request_count{%(queriesSelector)s})', + expr: 'sum by(' + groupAggList + ') (rate(server_rpc_full_scan_request_count{%(queriesSelector)s}[$__rate_interval]))', legendCustomTemplate: 'rpc full scan', }, }, @@ -177,7 +234,7 @@ function(this) unit: 'reqps', sources: { prometheus: { - expr: 'sum by(' + groupAggList + ') (server_rpc_mutate_request_count{%(queriesSelector)s})', + expr: 'sum by(' + groupAggList + ') (rate(server_rpc_mutate_request_count{%(queriesSelector)s}[$__rate_interval]))', legendCustomTemplate: 'rpc mutate', }, }, @@ -190,7 +247,7 @@ function(this) unit: 'reqps', sources: { prometheus: { - expr: 'sum by(' + groupAggList + ') (server_rpc_multi_request_count{%(queriesSelector)s})', + expr: 'sum by(' + groupAggList + ') (rate(server_rpc_multi_request_count{%(queriesSelector)s}[$__rate_interval]))', legendCustomTemplate: 'rpc multi', }, }, @@ -305,7 +362,7 @@ function(this) slowDeleteRate: { name: 'Slow deletes', type: 'raw', - description: 'The slow delete operations.', + description: 'The rate of slow delete operations.', unit: 'ops', sources: { prometheus: { @@ -368,31 +425,5 @@ function(this) }, }, }, - - regionServerAuthenticationSuccess: { - name: 'RegionServer authentication successes', - type: 'raw', - description: 'The rate of successful authentications to the RegionServer.', - unit: 'reqps', - sources: { - prometheus: { - expr: 'sum by(' + groupAggList + ') (rate(region_server_authentication_successes{%(queriesSelector)s}[$__rate_interval]))', - legendCustomTemplate: 'success', - }, - }, - }, - - regionServerAuthenticationFailure: { - name: 'RegionServer authentication failures', - type: 'raw', - description: 'The rate of failed authentications to the RegionServer.', - unit: 'reqps', - sources: { - prometheus: { - expr: 'sum by(' + groupAggList + ') (rate(region_server_authentication_failures{%(queriesSelector)s}[$__rate_interval]))', - legendCustomTemplate: 'failure', - }, - }, - }, }, } From b96a6803fc50891a2e9ff5881318d50c9fe088b8 Mon Sep 17 00:00:00 2001 From: schmikei Date: Wed, 26 Nov 2025 16:08:57 -0500 Subject: [PATCH 4/4] make lint --- apache-hbase-mixin/panels.libsonnet | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apache-hbase-mixin/panels.libsonnet b/apache-hbase-mixin/panels.libsonnet index f2f8a8499..4e293fdd4 100644 --- a/apache-hbase-mixin/panels.libsonnet +++ b/apache-hbase-mixin/panels.libsonnet @@ -151,7 +151,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; g.panel.table.standardOptions.withLinks([ { title: '', - url: '/d/'+ this.grafana.dashboards['apache-hbase-regionserver-overview.json'].uid + '?from=${__from}&to=${__to}&var-instance=${__data.fields["RegionServer"]}', + url: '/d/' + this.grafana.dashboards['apache-hbase-regionserver-overview.json'].uid + '?from=${__from}&to=${__to}&var-instance=${__data.fields["RegionServer"]}', }, ]) ),