From 1452311a9bb01baa85786345e119d719b5838307 Mon Sep 17 00:00:00 2001 From: Arthur Outhenin-Chalandre Date: Wed, 23 Mar 2022 12:50:20 +0100 Subject: [PATCH] ceph-mixin: rewrite promql queries to multiline Fixes: https://tracker.ceph.com/issues/55005 Signed-off-by: Arthur Outhenin-Chalandre --- .../ceph-mixin/dashboards/host.libsonnet | 276 ++++++++++++++++-- .../ceph-mixin/dashboards/osd.libsonnet | 172 +++++++++-- .../ceph-mixin/dashboards/pool.libsonnet | 93 +++++- .../ceph-mixin/dashboards/rbd.libsonnet | 54 +++- .../ceph-mixin/dashboards/rgw.libsonnet | 268 ++++++++++++++--- .../dashboards_out/host-details.json | 36 +-- .../dashboards_out/hosts-overview.json | 10 +- .../dashboards_out/osd-device-details.json | 18 +- .../dashboards_out/osds-overview.json | 18 +- .../dashboards_out/pool-detail.json | 16 +- .../dashboards_out/pool-overview.json | 12 +- .../dashboards_out/radosgw-detail.json | 24 +- .../dashboards_out/radosgw-overview.json | 44 +-- .../dashboards_out/rbd-details.json | 4 +- .../dashboards_out/rbd-overview.json | 10 +- 15 files changed, 861 insertions(+), 194 deletions(-) diff --git a/monitoring/ceph-mixin/dashboards/host.libsonnet b/monitoring/ceph-mixin/dashboards/host.libsonnet index 06cb66d0a0264..7006e890ab770 100644 --- a/monitoring/ceph-mixin/dashboards/host.libsonnet +++ b/monitoring/ceph-mixin/dashboards/host.libsonnet @@ -137,7 +137,14 @@ local u = import 'utils.libsonnet'; 'AVG CPU Busy', 'Average CPU busy across all hosts (OSD, RGW, MON etc) within the cluster', 'current', - 'avg(\n 1 - (\n avg by(instance) \n (irate(node_cpu_seconds_total{mode=\'idle\',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m]) or\n irate(node_cpu{mode=\'idle\',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m]))\n )\n )', + ||| + avg(1 - ( + avg by(instance) ( + irate(node_cpu_seconds_total{mode=\'idle\',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m]) or + irate(node_cpu{mode=\'idle\',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m]) + ) + )) + |||, true, 4, 0, @@ -149,7 +156,31 @@ local u = import 'utils.libsonnet'; 'AVG RAM Utilization', 'Average Memory Usage across all hosts in the cluster (excludes buffer/cache usage)', 'current', - 'avg (((node_memory_MemTotal{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"} or node_memory_MemTotal_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"})- (\n (node_memory_MemFree{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"} or node_memory_MemFree_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"}) + \n (node_memory_Cached{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"} or node_memory_Cached_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"}) + \n (node_memory_Buffers{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"} or node_memory_Buffers_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"}) +\n (node_memory_Slab{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"} or node_memory_Slab_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"})\n )) /\n (node_memory_MemTotal{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"} or node_memory_MemTotal_bytes{instance=~"($osd_hosts|$rgw_hosts|$mon_hosts|$mds_hosts).*"} ))', + ||| + avg (( + ( + node_memory_MemTotal{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"} or + node_memory_MemTotal_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"} + ) - (( + node_memory_MemFree{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"} or + node_memory_MemFree_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"}) + + ( + node_memory_Cached{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"} or + node_memory_Cached_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"} + ) + ( + node_memory_Buffers{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"} or + node_memory_Buffers_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"} + ) + ( + node_memory_Slab{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"} or + node_memory_Slab_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"} + ) + ) + ) + ( + node_memory_MemTotal{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"} or + node_memory_MemTotal_bytes{instance=~"($osd_hosts|$rgw_hosts|$mon_hosts|$mds_hosts).*"} + )) + |||, true, 8, 0, @@ -161,7 +192,15 @@ local u = import 'utils.libsonnet'; 'Physical IOPS', 'IOPS Load at the device as reported by the OS on all OSD hosts', 'current', - 'sum ((irate(node_disk_reads_completed{instance=~"($osd_hosts).*"}[5m]) or irate(node_disk_reads_completed_total{instance=~"($osd_hosts).*"}[5m]) ) + \n(irate(node_disk_writes_completed{instance=~"($osd_hosts).*"}[5m]) or irate(node_disk_writes_completed_total{instance=~"($osd_hosts).*"}[5m])))', + ||| + sum (( + irate(node_disk_reads_completed{instance=~"($osd_hosts).*"}[5m]) or + irate(node_disk_reads_completed_total{instance=~"($osd_hosts).*"}[5m]) + ) + ( + irate(node_disk_writes_completed{instance=~"($osd_hosts).*"}[5m]) or + irate(node_disk_writes_completed_total{instance=~"($osd_hosts).*"}[5m]) + )) + |||, true, 12, 0, @@ -173,7 +212,20 @@ local u = import 'utils.libsonnet'; 'AVG Disk Utilization', 'Average Disk utilization for all OSD data devices (i.e. excludes journal/WAL)', 'current', - 'avg (\n label_replace((irate(node_disk_io_time_ms[5m]) / 10 ) or\n (irate(node_disk_io_time_seconds_total[5m]) * 100), "instance", "$1", "instance", "([^.:]*).*"\n ) *\n on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation_human{instance=~"($osd_hosts).*"}, "device", "$1", "device", "/dev/(.*)"), "instance", "$1", "instance", "([^.:]*).*")\n)', + ||| + avg ( + label_replace( + (irate(node_disk_io_time_ms[5m]) / 10 ) or + (irate(node_disk_io_time_seconds_total[5m]) * 100), + "instance", "$1", "instance", "([^.:]*).*" + ) * on(instance, device) group_left(ceph_daemon) label_replace( + label_replace( + ceph_disk_occupation_human{instance=~"($osd_hosts).*"}, + "device", "$1", "device", "/dev/(.*)" + ), "instance", "$1", "instance", "([^.:]*).*" + ) + ) + |||, true, 16, 0, @@ -212,7 +264,18 @@ local u = import 'utils.libsonnet'; 'CPU Busy - Top 10 Hosts', 'Show the top 10 busiest hosts by cpu', 'percent', - 'topk(10,100 * ( 1 - (\n avg by(instance) \n (irate(node_cpu_seconds_total{mode=\'idle\',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m]) or\n irate(node_cpu{mode=\'idle\',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m]))\n )\n )\n)', + ||| + topk(10, + 100 * ( + 1 - ( + avg by(instance) ( + irate(node_cpu_seconds_total{mode=\'idle\',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m]) or + irate(node_cpu{mode=\'idle\',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m]) + ) + ) + ) + ) + |||, '{{instance}}', 0, 5, @@ -341,7 +404,28 @@ local u = import 'utils.libsonnet'; system: '#890F02', user: '#3F6833', wait: '#C15C17', - }, 'CPU Utilization', "Shows the CPU breakdown. When multiple servers are selected, only the first host's cpu data is shown", 'null', 'percent', '% Utilization', 'sum by (mode) (\n irate(node_cpu{instance=~"($ceph_hosts)([\\\\.:].*)?", mode=~"(irq|nice|softirq|steal|system|user|iowait)"}[1m]) or\n irate(node_cpu_seconds_total{instance=~"($ceph_hosts)([\\\\.:].*)?", mode=~"(irq|nice|softirq|steal|system|user|iowait)"}[1m])\n) / scalar(\n sum(irate(node_cpu{instance=~"($ceph_hosts)([\\\\.:].*)?"}[1m]) or\n irate(node_cpu_seconds_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[1m]))\n) * 100', '{{mode}}', 3, 1, 6, 10 + }, + 'CPU Utilization', + "Shows the CPU breakdown. When multiple servers are selected, only the first host's cpu data is shown", + 'null', + 'percent', + '% Utilization', + ||| + sum by (mode) ( + irate(node_cpu{instance=~"($ceph_hosts)([\\\\.:].*)?", mode=~"(irq|nice|softirq|steal|system|user|iowait)"}[1m]) or + irate(node_cpu_seconds_total{instance=~"($ceph_hosts)([\\\\.:].*)?", mode=~"(irq|nice|softirq|steal|system|user|iowait)"}[1m]) + ) / ( + scalar( + sum(irate(node_cpu{instance=~"($ceph_hosts)([\\\\.:].*)?"}[1m]) or + irate(node_cpu_seconds_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[1m])) + ) * 100 + ) + |||, + '{{mode}}', + 3, + 1, + 6, + 10 ), HostDetailsGraphPanel( { @@ -357,7 +441,10 @@ local u = import 'utils.libsonnet'; 'null', 'bytes', 'RAM used', - 'node_memory_MemFree{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_MemFree_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"} ', + ||| + node_memory_MemFree{instance=~"$ceph_hosts([\\\\.:].*)?"} or + node_memory_MemFree_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"} + |||, 'Free', 9, 1, @@ -366,9 +453,52 @@ local u = import 'utils.libsonnet'; ) .addTargets( [ - u.addTargetSchema('node_memory_MemTotal{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_MemTotal_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"} ', 'total'), - u.addTargetSchema('(node_memory_Cached{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_Cached_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}) + \n(node_memory_Buffers{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_Buffers_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}) +\n(node_memory_Slab{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_Slab_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}) \n', 'buffers/cache'), - u.addTargetSchema('(node_memory_MemTotal{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_MemTotal_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"})- (\n (node_memory_MemFree{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_MemFree_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}) + \n (node_memory_Cached{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_Cached_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}) + \n (node_memory_Buffers{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_Buffers_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}) +\n (node_memory_Slab{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_Slab_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"})\n )\n \n', 'used'), + u.addTargetSchema( + ||| + node_memory_MemTotal{instance=~"$ceph_hosts([\\\\.:].*)?"} or + node_memory_MemTotal_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"} + |||, + 'total' + ), + u.addTargetSchema( + ||| + ( + node_memory_Cached{instance=~"$ceph_hosts([\\\\.:].*)?"} or + node_memory_Cached_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"} + ) + ( + node_memory_Buffers{instance=~"$ceph_hosts([\\\\.:].*)?"} or + node_memory_Buffers_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"} + ) + ( + node_memory_Slab{instance=~"$ceph_hosts([\\\\.:].*)?"} or + node_memory_Slab_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"} + ) + |||, + 'buffers/cache' + ), + u.addTargetSchema( + ||| + ( + node_memory_MemTotal{instance=~"$ceph_hosts([\\\\.:].*)?"} or + node_memory_MemTotal_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"} + ) - ( + ( + node_memory_MemFree{instance=~"$ceph_hosts([\\\\.:].*)?"} or + node_memory_MemFree_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"} + ) + ( + node_memory_Cached{instance=~"$ceph_hosts([\\\\.:].*)?"} or + node_memory_Cached_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"} + ) + ( + node_memory_Buffers{instance=~"$ceph_hosts([\\\\.:].*)?"} or + node_memory_Buffers_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"} + ) + + ( + node_memory_Slab{instance=~"$ceph_hosts([\\\\.:].*)?"} or + node_memory_Slab_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"} + ) + ) + |||, + 'used' + ), ] ) .addSeriesOverride( @@ -387,7 +517,14 @@ local u = import 'utils.libsonnet'; 'null', 'decbytes', 'Send (-) / Receive (+)', - 'sum by (device) (\n irate(node_network_receive_bytes{instance=~"($ceph_hosts)([\\\\.:].*)?",device!="lo"}[1m]) or \n irate(node_network_receive_bytes_total{instance=~"($ceph_hosts)([\\\\.:].*)?",device!="lo"}[1m])\n)', + ||| + sum by (device) ( + irate( + node_network_receive_bytes{instance=~"($ceph_hosts)([\\\\.:].*)?",device!="lo"}[1m]) or + irate(node_network_receive_bytes_total{instance=~"($ceph_hosts)([\\\\.:].*)?",device!="lo"}[1m] + ) + ) + |||, '{{device}}.rx', 15, 1, @@ -396,7 +533,15 @@ local u = import 'utils.libsonnet'; ) .addTargets( [ - u.addTargetSchema('sum by (device) (\n irate(node_network_transmit_bytes{instance=~"($ceph_hosts)([\\\\.:].*)?",device!="lo"}[1m]) or\n irate(node_network_transmit_bytes_total{instance=~"($ceph_hosts)([\\\\.:].*)?",device!="lo"}[1m])\n)', '{{device}}.tx'), + u.addTargetSchema( + ||| + sum by (device) ( + irate(node_network_transmit_bytes{instance=~"($ceph_hosts)([\\\\.:].*)?",device!="lo"}[1m]) or + irate(node_network_transmit_bytes_total{instance=~"($ceph_hosts)([\\\\.:].*)?",device!="lo"}[1m]) + ) + |||, + '{{device}}.tx' + ), ] ) .addSeriesOverride( @@ -409,7 +554,10 @@ local u = import 'utils.libsonnet'; 'null', 'pps', 'Send (-) / Receive (+)', - 'irate(node_network_receive_drop{instance=~"$ceph_hosts([\\\\.:].*)?"}[1m]) or irate(node_network_receive_drop_total{instance=~"$ceph_hosts([\\\\.:].*)?"}[1m])', + ||| + irate(node_network_receive_drop{instance=~"$ceph_hosts([\\\\.:].*)?"}[1m]) or + irate(node_network_receive_drop_total{instance=~"$ceph_hosts([\\\\.:].*)?"}[1m]) + |||, '{{device}}.rx', 21, 1, @@ -419,7 +567,11 @@ local u = import 'utils.libsonnet'; .addTargets( [ u.addTargetSchema( - 'irate(node_network_transmit_drop{instance=~"$ceph_hosts([\\\\.:].*)?"}[1m]) or irate(node_network_transmit_drop_total{instance=~"$ceph_hosts([\\\\.:].*)?"}[1m])', '{{device}}.tx' + ||| + irate(node_network_transmit_drop{instance=~"$ceph_hosts([\\\\.:].*)?"}[1m]) or + irate(node_network_transmit_drop_total{instance=~"$ceph_hosts([\\\\.:].*)?"}[1m]) + |||, + '{{device}}.tx' ), ] ) @@ -434,7 +586,12 @@ local u = import 'utils.libsonnet'; 'Raw Capacity', 'Each OSD consists of a Journal/WAL partition and a data partition. The RAW Capacity shown is the sum of the data partitions across all OSDs on the selected OSD hosts.', 'current', - 'sum(ceph_osd_stat_bytes and on (ceph_daemon) ceph_disk_occupation{instance=~"($ceph_hosts)([\\\\.:].*)?"})', + ||| + sum( + ceph_osd_stat_bytes and + on (ceph_daemon) ceph_disk_occupation{instance=~"($ceph_hosts)([\\\\.:].*)?"} + ) + |||, 0, 6, 3, @@ -447,7 +604,10 @@ local u = import 'utils.libsonnet'; 'null', 'pps', 'Send (-) / Receive (+)', - 'irate(node_network_receive_errs{instance=~"$ceph_hosts([\\\\.:].*)?"}[1m]) or irate(node_network_receive_errs_total{instance=~"$ceph_hosts([\\\\.:].*)?"}[1m])', + ||| + irate(node_network_receive_errs{instance=~"$ceph_hosts([\\\\.:].*)?"}[1m]) or + irate(node_network_receive_errs_total{instance=~"$ceph_hosts([\\\\.:].*)?"}[1m]) + |||, '{{device}}.rx', 21, 6, @@ -456,7 +616,11 @@ local u = import 'utils.libsonnet'; ) .addTargets( [u.addTargetSchema( - 'irate(node_network_transmit_errs{instance=~"$ceph_hosts([\\\\.:].*)?"}[1m]) or irate(node_network_transmit_errs_total{instance=~"$ceph_hosts([\\\\.:].*)?"}[1m])', '{{device}}.tx' + ||| + irate(node_network_transmit_errs{instance=~"$ceph_hosts([\\\\.:].*)?"}[1m]) or + irate(node_network_transmit_errs_total{instance=~"$ceph_hosts([\\\\.:].*)?"}[1m]) + |||, + '{{device}}.tx' )] ) .addSeriesOverride( @@ -475,7 +639,18 @@ local u = import 'utils.libsonnet'; 'connected', 'ops', 'Read (-) / Write (+)', - 'label_replace(\n (\n irate(node_disk_writes_completed{instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m]) or\n irate(node_disk_writes_completed_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m])\n ),\n "instance",\n "$1",\n "instance",\n "([^:.]*).*"\n)\n* on(instance, device) group_left(ceph_daemon)\n label_replace(\n label_replace(\n ceph_disk_occupation_human,\n "device",\n "$1",\n "device",\n "/dev/(.*)"\n ),\n "instance",\n "$1",\n "instance",\n "([^:.]*).*"\n )', + ||| + label_replace( + ( + irate(node_disk_writes_completed{instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m]) or + irate(node_disk_writes_completed_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m]) + ), "instance", "$1", "instance", "([^:.]*).*" + ) * on(instance, device) group_left(ceph_daemon) label_replace( + label_replace( + ceph_disk_occupation_human, "device", "$1", "device", "/dev/(.*)" + ), "instance", "$1", "instance", "([^:.]*).*" + ) + |||, '{{device}}({{ceph_daemon}}) writes', 0, 12, @@ -485,7 +660,18 @@ local u = import 'utils.libsonnet'; .addTargets( [ u.addTargetSchema( - 'label_replace(\n (irate(node_disk_reads_completed{instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m]) or irate(node_disk_reads_completed_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m])),\n "instance",\n "$1",\n "instance",\n "([^:.]*).*"\n)\n* on(instance, device) group_left(ceph_daemon)\n label_replace(\n label_replace(\n ceph_disk_occupation_human,\n "device",\n "$1",\n "device",\n "/dev/(.*)"\n ),\n "instance",\n "$1",\n "instance",\n "([^:.]*).*"\n )', + ||| + label_replace( + ( + irate(node_disk_reads_completed{instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m]) or + irate(node_disk_reads_completed_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m]) + ), "instance", "$1", "instance", "([^:.]*).*" + ) * on(instance, device) group_left(ceph_daemon) label_replace( + label_replace( + ceph_disk_occupation_human,"device", "$1", "device", "/dev/(.*)" + ), "instance", "$1", "instance", "([^:.]*).*" + ) + |||, '{{device}}({{ceph_daemon}}) reads' ), ] @@ -500,7 +686,17 @@ local u = import 'utils.libsonnet'; 'connected', 'Bps', 'Read (-) / Write (+)', - 'label_replace((irate(node_disk_bytes_written{instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m]) or irate(node_disk_written_bytes_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m])), "instance", "$1", "instance", "([^:.]*).*") * on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation_human, "device", "$1", "device", "/dev/(.*)"), "instance", "$1", "instance", "([^:.]*).*")', + ||| + label_replace( + ( + irate(node_disk_bytes_written{instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m]) or + irate(node_disk_written_bytes_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m]) + ), "instance", "$1", "instance", "([^:.]*).*") * on(instance, device) + group_left(ceph_daemon) label_replace( + label_replace(ceph_disk_occupation_human, "device", "$1", "device", "/dev/(.*)"), + "instance", "$1", "instance", "([^:.]*).*" + ) + |||, '{{device}}({{ceph_daemon}}) write', 12, 12, @@ -509,7 +705,18 @@ local u = import 'utils.libsonnet'; ) .addTargets( [u.addTargetSchema( - 'label_replace((irate(node_disk_bytes_read{instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m]) or irate(node_disk_read_bytes_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m])), "instance", "$1", "instance", "([^:.]*).*") * on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation_human, "device", "$1", "device", "/dev/(.*)"), "instance", "$1", "instance", "([^:.]*).*")', + ||| + label_replace( + ( + irate(node_disk_bytes_read{instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m]) or + irate(node_disk_read_bytes_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m]) + ), + "instance", "$1", "instance", "([^:.]*).*") * on(instance, device) + group_left(ceph_daemon) label_replace( + label_replace(ceph_disk_occupation_human, "device", "$1", "device", "/dev/(.*)"), + "instance", "$1", "instance", "([^:.]*).*" + ) + |||, '{{device}}({{ceph_daemon}}) read' )] ) @@ -523,7 +730,20 @@ local u = import 'utils.libsonnet'; 'null as zero', 's', '', - 'max by(instance,device) (label_replace((irate(node_disk_write_time_seconds_total{ instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m]) ) / clamp_min(irate(node_disk_writes_completed_total{ instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m]), 0.001) or (irate(node_disk_read_time_seconds_total{ instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m]) ) / clamp_min(irate(node_disk_reads_completed_total{ instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m]), 0.001), "instance", "$1", "instance", "([^:.]*).*")) * on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation_human{instance=~"($ceph_hosts)([\\\\.:].*)?"}, "device", "$1", "device", "/dev/(.*)"), "instance", "$1", "instance", "([^:.]*).*")', + ||| + max by(instance, device) (label_replace( + (irate(node_disk_write_time_seconds_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m])) / + clamp_min(irate(node_disk_writes_completed_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m]), 0.001) or + (irate(node_disk_read_time_seconds_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m])) / + clamp_min(irate(node_disk_reads_completed_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m]), 0.001), + "instance", "$1", "instance", "([^:.]*).*" + )) * on(instance, device) group_left(ceph_daemon) label_replace( + label_replace( + ceph_disk_occupation_human{instance=~"($ceph_hosts)([\\\\.:].*)?"}, + "device", "$1", "device", "/dev/(.*)" + ), "instance", "$1", "instance", "([^:.]*).*" + ) + |||, '{{device}}({{ceph_daemon}})', 0, 21, @@ -537,7 +757,17 @@ local u = import 'utils.libsonnet'; 'connected', 'percent', '%Util', - 'label_replace(((irate(node_disk_io_time_ms{instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m]) / 10 ) or irate(node_disk_io_time_seconds_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m]) * 100), "instance", "$1", "instance", "([^:.]*).*") * on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation_human{instance=~"($ceph_hosts)([\\\\.:].*)?"}, "device", "$1", "device", "/dev/(.*)"), "instance", "$1", "instance", "([^:.]*).*")', + ||| + label_replace( + ( + (irate(node_disk_io_time_ms{instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m]) / 10) or + irate(node_disk_io_time_seconds_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m]) * 100 + ), "instance", "$1", "instance", "([^:.]*).*" + ) * on(instance, device) group_left(ceph_daemon) label_replace( + label_replace(ceph_disk_occupation_human{instance=~"($ceph_hosts)([\\\\.:].*)?"}, + "device", "$1", "device", "/dev/(.*)"), "instance", "$1", "instance", "([^:.]*).*" + ) + |||, '{{device}}({{ceph_daemon}})', 12, 21, diff --git a/monitoring/ceph-mixin/dashboards/osd.libsonnet b/monitoring/ceph-mixin/dashboards/osd.libsonnet index c5e614675e3f2..4c0623493abed 100644 --- a/monitoring/ceph-mixin/dashboards/osd.libsonnet +++ b/monitoring/ceph-mixin/dashboards/osd.libsonnet @@ -135,11 +135,25 @@ local u = import 'utils.libsonnet'; .addTargets( [ u.addTargetSchema( - 'max (irate(ceph_osd_op_r_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m]) * 1000)', + ||| + max( + irate(ceph_osd_op_r_latency_sum[1m]) / + on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m]) * 1000 + ) + |||, 'MAX read' ), u.addTargetSchema( - 'quantile(0.95,\n (irate(ceph_osd_op_r_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m]) * 1000)\n)', '@95%ile' + ||| + quantile(0.95, + ( + irate(ceph_osd_op_r_latency_sum[1m]) / + on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m]) + * 1000 + ) + ) + |||, + '@95%ile' ), ], ), @@ -157,7 +171,21 @@ local u = import 'utils.libsonnet'; ) .addTarget( u.addTargetSchema( - 'topk(10,\n (sort(\n (irate(ceph_osd_op_r_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m]) * 1000)\n ))\n)\n\n', '', 'table', 1, true + ||| + topk(10, + (sort( + ( + irate(ceph_osd_op_r_latency_sum[1m]) / + on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m]) * + 1000 + ) + )) + ) + |||, + '', + 'table', + 1, + true ) ) + { gridPos: { x: 8, y: 0, w: 4, h: 8 } }, OsdOverviewGraphPanel( @@ -169,7 +197,13 @@ local u = import 'utils.libsonnet'; 'ms', null, '0', - 'avg (irate(ceph_osd_op_w_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m]) * 1000)', + ||| + avg( + irate(ceph_osd_op_w_latency_sum[1m]) / + on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m]) + * 1000 + ) + |||, 'AVG write', 12, 0, @@ -179,11 +213,22 @@ local u = import 'utils.libsonnet'; .addTargets( [ u.addTargetSchema( - 'max (irate(ceph_osd_op_w_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m]) * 1000)', - 'MAX write' + ||| + max( + irate(ceph_osd_op_w_latency_sum[1m]) / + on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m]) * + 1000 + ) + |||, 'MAX write' ), u.addTargetSchema( - 'quantile(0.95,\n (irate(ceph_osd_op_w_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m]) * 1000)\n)', '@95%ile write' + ||| + quantile(0.95, ( + irate(ceph_osd_op_w_latency_sum[1m]) / + on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m]) * + 1000 + )) + |||, '@95%ile write' ), ], ), @@ -203,7 +248,15 @@ local u = import 'utils.libsonnet'; ) .addTarget( u.addTargetSchema( - 'topk(10,\n (sort(\n (irate(ceph_osd_op_w_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m]) * 1000)\n ))\n)\n\n', + ||| + topk(10, + (sort( + (irate(ceph_osd_op_w_latency_sum[1m]) / + on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m]) * + 1000) + )) + ) + |||, '', 'table', 1, @@ -271,7 +324,7 @@ local u = import 'utils.libsonnet'; min='0', nullPointMode='null') .addTarget(u.addTargetSchema( - 'ceph_osd_numpg\n', 'PGs per OSD', 'time_series', 1, true + 'ceph_osd_numpg', 'PGs per OSD', 'time_series', 1, true )) + { gridPos: { x: 12, y: 8, w: 8, h: 8 } }, OsdOverviewSingleStatPanel( ['#d44a3a', '#299c46'], @@ -284,7 +337,12 @@ local u = import 'utils.libsonnet'; true, false, '.75', - 'sum(ceph_bluestore_onode_hits)/(sum(ceph_bluestore_onode_hits) + sum(ceph_bluestore_onode_misses))', + ||| + sum(ceph_bluestore_onode_hits) / ( + sum(ceph_bluestore_onode_hits) + + sum(ceph_bluestore_onode_misses) + ) + |||, 20, 8, 4, @@ -400,8 +458,14 @@ local u = import 'utils.libsonnet'; '', 's', 'Read (-) / Write (+)', - 'irate(ceph_osd_op_r_latency_sum{ceph_daemon=~"$osd"}[1m]) / on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m])', - 'irate(ceph_osd_op_w_latency_sum{ceph_daemon=~"$osd"}[1m]) / on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m])', + ||| + irate(ceph_osd_op_r_latency_sum{ceph_daemon=~"$osd"}[1m]) / + on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m]) + |||, + ||| + irate(ceph_osd_op_w_latency_sum{ceph_daemon=~"$osd"}[1m]) / + on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m]) + |||, 'read', 'write', 0, @@ -455,8 +519,31 @@ local u = import 'utils.libsonnet'; '', 's', 'Read (-) / Write (+)', - '(label_replace(irate(node_disk_read_time_seconds_total[1m]) / irate(node_disk_reads_completed_total[1m]), "instance", "$1", "instance", "([^:.]*).*") and on (instance, device) label_replace(label_replace(ceph_disk_occupation_human{ceph_daemon=~"$osd"}, "device", "$1", "device", "/dev/(.*)"), "instance", "$1", "instance", "([^:.]*).*"))', - '(label_replace(irate(node_disk_write_time_seconds_total[1m]) / irate(node_disk_writes_completed_total[1m]), "instance", "$1", "instance", "([^:.]*).*") and on (instance, device) label_replace(label_replace(ceph_disk_occupation_human{ceph_daemon=~"$osd"}, "device", "$1", "device", "/dev/(.*)"), "instance", "$1", "instance", "([^:.]*).*"))', + ||| + ( + label_replace( + irate(node_disk_read_time_seconds_total[1m]) / irate(node_disk_reads_completed_total[1m]), + "instance", "$1", "instance", "([^:.]*).*" + ) and on (instance, device) label_replace( + label_replace( + ceph_disk_occupation_human{ceph_daemon=~"$osd"}, + "device", "$1", "device", "/dev/(.*)" + ), "instance", "$1", "instance", "([^:.]*).*" + ) + ) + |||, + ||| + ( + label_replace( + irate(node_disk_write_time_seconds_total[1m]) / irate(node_disk_writes_completed_total[1m]), + "instance", "$1", "instance", "([^:.]*).*") and on (instance, device) + label_replace( + label_replace( + ceph_disk_occupation_human{ceph_daemon=~"$osd"}, "device", "$1", "device", "/dev/(.*)" + ), "instance", "$1", "instance", "([^:.]*).*" + ) + ) + |||, '{{instance}}/{{device}} Reads', '{{instance}}/{{device}} Writes', 0, @@ -472,8 +559,28 @@ local u = import 'utils.libsonnet'; '', 'short', 'Read (-) / Write (+)', - 'label_replace(irate(node_disk_writes_completed_total[1m]), "instance", "$1", "instance", "([^:.]*).*") and on (instance, device) label_replace(label_replace(ceph_disk_occupation_human{ceph_daemon=~"$osd"}, "device", "$1", "device", "/dev/(.*)"), "instance", "$1", "instance", "([^:.]*).*")', - 'label_replace(irate(node_disk_reads_completed_total[1m]), "instance", "$1", "instance", "([^:.]*).*") and on (instance, device) label_replace(label_replace(ceph_disk_occupation_human{ceph_daemon=~"$osd"}, "device", "$1", "device", "/dev/(.*)"), "instance", "$1", "instance", "([^:.]*).*")', + ||| + label_replace( + irate(node_disk_writes_completed_total[1m]), + "instance", "$1", "instance", "([^:.]*).*" + ) and on (instance, device) label_replace( + label_replace( + ceph_disk_occupation_human{ceph_daemon=~"$osd"}, + "device", "$1", "device", "/dev/(.*)" + ), "instance", "$1", "instance", "([^:.]*).*" + ) + |||, + ||| + label_replace( + irate(node_disk_reads_completed_total[1m]), + "instance", "$1", "instance", "([^:.]*).*" + ) and on (instance, device) label_replace( + label_replace( + ceph_disk_occupation_human{ceph_daemon=~"$osd"}, + "device", "$1", "device", "/dev/(.*)" + ), "instance", "$1", "instance", "([^:.]*).*" + ) + |||, '{{device}} on {{instance}} Writes', '{{device}} on {{instance}} Reads', 6, @@ -489,8 +596,26 @@ local u = import 'utils.libsonnet'; '', 'Bps', 'Read (-) / Write (+)', - 'label_replace(irate(node_disk_read_bytes_total[1m]), "instance", "$1", "instance", "([^:.]*).*") and on (instance, device) label_replace(label_replace(ceph_disk_occupation_human{ceph_daemon=~"$osd"}, "device", "$1", "device", "/dev/(.*)"), "instance", "$1", "instance", "([^:.]*).*")', - 'label_replace(irate(node_disk_written_bytes_total[1m]), "instance", "$1", "instance", "([^:.]*).*") and on (instance, device) label_replace(label_replace(ceph_disk_occupation_human{ceph_daemon=~"$osd"}, "device", "$1", "device", "/dev/(.*)"), "instance", "$1", "instance", "([^:.]*).*")', + ||| + label_replace( + irate(node_disk_read_bytes_total[1m]), "instance", "$1", "instance", "([^:.]*).*" + ) and on (instance, device) label_replace( + label_replace( + ceph_disk_occupation_human{ceph_daemon=~"$osd"}, + "device", "$1", "device", "/dev/(.*)" + ), "instance", "$1", "instance", "([^:.]*).*" + ) + |||, + ||| + label_replace( + irate(node_disk_written_bytes_total[1m]), "instance", "$1", "instance", "([^:.]*).*" + ) and on (instance, device) label_replace( + label_replace( + ceph_disk_occupation_human{ceph_daemon=~"$osd"}, + "device", "$1", "device", "/dev/(.*)" + ), "instance", "$1", "instance", "([^:.]*).*" + ) + |||, '{{instance}} {{device}} Reads', '{{instance}} {{device}} Writes', 12, @@ -516,7 +641,16 @@ local u = import 'utils.libsonnet'; '$datasource' ) .addTarget(u.addTargetSchema( - 'label_replace(irate(node_disk_io_time_seconds_total[1m]), "instance", "$1", "instance", "([^:.]*).*") and on (instance, device) label_replace(label_replace(ceph_disk_occupation_human{ceph_daemon=~"$osd"}, "device", "$1", "device", "/dev/(.*)"), "instance", "$1", "instance", "([^:.]*).*")', + ||| + label_replace( + irate(node_disk_io_time_seconds_total[1m]), + "instance", "$1", "instance", "([^:.]*).*" + ) and on (instance, device) label_replace( + label_replace( + ceph_disk_occupation_human{ceph_daemon=~"$osd"}, "device", "$1", "device", "/dev/(.*)" + ), "instance", "$1", "instance", "([^:.]*).*" + ) + |||, '{{device}} on {{instance}}' )) + { gridPos: { x: 18, y: 11, w: 6, h: 9 } }, ]), diff --git a/monitoring/ceph-mixin/dashboards/pool.libsonnet b/monitoring/ceph-mixin/dashboards/pool.libsonnet index 8fb4f815c745a..12f7c6789ec28 100644 --- a/monitoring/ceph-mixin/dashboards/pool.libsonnet +++ b/monitoring/ceph-mixin/dashboards/pool.libsonnet @@ -87,7 +87,10 @@ local u = import 'utils.libsonnet'; 22, [], '', - { refresh_intervals: ['5s', '10s', '15s', '30s', '1m', '5m', '15m', '30m', '1h', '2h', '1d'], time_options: ['5m', '15m', '1h', '6h', '12h', '24h', '2d', '7d', '30d'] } + { + refresh_intervals: ['5s', '10s', '15s', '30s', '1m', '5m', '15m', '30m', '1h', '2h', '1d'], + time_options: ['5m', '15m', '1h', '6h', '12h', '24h', '2d', '7d', '30d'], + } ) .addAnnotation( u.addAnnotationSchema( @@ -194,9 +197,14 @@ local u = import 'utils.libsonnet'; PoolOverviewSingleStatPanel( 'percent', 'Compression Eligibility', - 'Indicates how suitable the data is within the pools that are/have been enabled for compression - averaged across all pools holding compressed data\n', + 'Indicates how suitable the data is within the pools that are/have been enabled for compression - averaged across all pools holding compressed data', 'current', - '(sum(ceph_pool_compress_under_bytes > 0) / sum(ceph_pool_stored_raw and ceph_pool_compress_under_bytes > 0)) * 100', + ||| + ( + sum(ceph_pool_compress_under_bytes > 0) / + sum(ceph_pool_stored_raw and ceph_pool_compress_under_bytes > 0) + ) * 100 + |||, null, 'table', 18, @@ -249,7 +257,11 @@ local u = import 'utils.libsonnet'; .addTargets( [ u.addTargetSchema( - '(ceph_pool_compress_under_bytes / ceph_pool_compress_bytes_used > 0) and on(pool_id) (((ceph_pool_compress_under_bytes > 0) / ceph_pool_stored_raw) * 100 > 0.5)', + ||| + (ceph_pool_compress_under_bytes / ceph_pool_compress_bytes_used > 0) and on(pool_id) ( + ((ceph_pool_compress_under_bytes > 0) / ceph_pool_stored_raw) * 100 > 0.5 + ) + |||, 'A', 'table', 1, @@ -321,7 +333,13 @@ local u = import 'utils.libsonnet'; 'This chart shows the sum of read and write IOPS from all clients by pool', 'short', 'IOPS', - 'topk($topk,round((rate(ceph_pool_rd[30s]) + rate(ceph_pool_wr[30s])),1) * on(pool_id) group_left(instance,name) ceph_pool_metadata) ', + ||| + topk($topk, + round( + (rate(ceph_pool_rd[30s]) + rate(ceph_pool_wr[30s])), + 1 + ) * on(pool_id) group_left(instance,name) ceph_pool_metadata) + |||, '{{name}} ', 0, 9, @@ -330,7 +348,12 @@ local u = import 'utils.libsonnet'; ) .addTarget( u.addTargetSchema( - 'topk($topk,rate(ceph_pool_wr[30s]) + on(pool_id) group_left(instance,name) ceph_pool_metadata) ', + ||| + topk($topk, + rate(ceph_pool_wr[30s]) + + on(pool_id) group_left(instance,name) ceph_pool_metadata + ) + |||, '{{name}} - write' ) ), @@ -339,7 +362,12 @@ local u = import 'utils.libsonnet'; 'The chart shows the sum of read and write bytes from all clients, by pool', 'Bps', 'Throughput', - 'topk($topk,(rate(ceph_pool_rd_bytes[30s]) + rate(ceph_pool_wr_bytes[30s])) * on(pool_id) group_left(instance,name) ceph_pool_metadata)', + ||| + topk($topk, + (rate(ceph_pool_rd_bytes[30s]) + rate(ceph_pool_wr_bytes[30s])) * + on(pool_id) group_left(instance, name) ceph_pool_metadata + ) + |||, '{{name}}', 12, 9, @@ -476,7 +504,10 @@ local u = import 'utils.libsonnet'; true, true, '.7,.8', - '(ceph_pool_stored / (ceph_pool_stored + ceph_pool_max_avail)) * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}', + ||| + (ceph_pool_stored / (ceph_pool_stored + ceph_pool_max_avail)) * + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"} + |||, 'time_series', 0, 0, @@ -493,7 +524,10 @@ local u = import 'utils.libsonnet'; false, '', 'current', - '(ceph_pool_max_avail / deriv(ceph_pool_stored[6h])) * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"} > 0', + ||| + (ceph_pool_max_avail / deriv(ceph_pool_stored[6h])) * + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"} > 0 + |||, 'time_series', 7, 0, @@ -510,7 +544,10 @@ local u = import 'utils.libsonnet'; '', 'ops', 'Objects out(-) / in(+) ', - 'deriv(ceph_pool_objects[1m]) * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}', + ||| + deriv(ceph_pool_objects[1m]) * + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"} + |||, 'Objects per second', 12, 0, @@ -521,12 +558,29 @@ local u = import 'utils.libsonnet'; { read_op_per_sec: '#3F6833', write_op_per_sec: '#E5AC0E', - }, '$pool_name Client IOPS', '', 'iops', 'Read (-) / Write (+)', 'irate(ceph_pool_rd[1m]) * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}', 'reads', 0, 7, 12, 7 + }, + '$pool_name Client IOPS', + '', + 'iops', + 'Read (-) / Write (+)', + ||| + irate(ceph_pool_rd[1m]) * + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"} + |||, + 'reads', + 0, + 7, + 12, + 7 ) .addSeriesOverride({ alias: 'reads', transform: 'negative-Y' }) .addTarget( u.addTargetSchema( - 'irate(ceph_pool_wr[1m]) * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}', 'writes' + ||| + irate(ceph_pool_wr[1m]) * + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"} + |||, + 'writes' ) ), PoolDetailGraphPanel( @@ -538,7 +592,10 @@ local u = import 'utils.libsonnet'; '', 'Bps', 'Read (-) / Write (+)', - 'irate(ceph_pool_rd_bytes[1m]) + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}', + ||| + irate(ceph_pool_rd_bytes[1m]) + + on(pool_id) group_left(instance, name) ceph_pool_metadata{name=~"$pool_name"} + |||, 'reads', 12, 7, @@ -548,7 +605,10 @@ local u = import 'utils.libsonnet'; .addSeriesOverride({ alias: 'reads', transform: 'negative-Y' }) .addTarget( u.addTargetSchema( - 'irate(ceph_pool_wr_bytes[1m]) + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}', + ||| + irate(ceph_pool_wr_bytes[1m]) + + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"} + |||, 'writes' ) ), @@ -561,7 +621,10 @@ local u = import 'utils.libsonnet'; '', 'short', 'Objects', - 'ceph_pool_objects * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}', + ||| + ceph_pool_objects * + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"} + |||, 'Number of Objects', 0, 14, diff --git a/monitoring/ceph-mixin/dashboards/rbd.libsonnet b/monitoring/ceph-mixin/dashboards/rbd.libsonnet index a4ca6982d3bb7..88e2568ecedfd 100644 --- a/monitoring/ceph-mixin/dashboards/rbd.libsonnet +++ b/monitoring/ceph-mixin/dashboards/rbd.libsonnet @@ -103,8 +103,14 @@ local u = import 'utils.libsonnet'; RbdDetailsPanel( 'Average Latency', 'ns', - 'irate(ceph_rbd_write_latency_sum{pool="$Pool", image="$Image"}[30s]) / irate(ceph_rbd_write_latency_count{pool="$Pool", image="$Image"}[30s])', - 'irate(ceph_rbd_read_latency_sum{pool="$Pool", image="$Image"}[30s]) / irate(ceph_rbd_read_latency_count{pool="$Pool", image="$Image"}[30s])', + ||| + irate(ceph_rbd_write_latency_sum{pool="$Pool", image="$Image"}[30s]) / + irate(ceph_rbd_write_latency_count{pool="$Pool", image="$Image"}[30s]) + |||, + ||| + irate(ceph_rbd_read_latency_sum{pool="$Pool", image="$Image"}[30s]) / + irate(ceph_rbd_read_latency_count{pool="$Pool", image="$Image"}[30s]) + |||, 16, 0, 8, @@ -226,8 +232,18 @@ local u = import 'utils.libsonnet'; RbdOverviewPanel( 'Average Latency', 'ns', - 'round(sum(irate(ceph_rbd_write_latency_sum[30s])) / sum(irate(ceph_rbd_write_latency_count[30s])))', - 'round(sum(irate(ceph_rbd_read_latency_sum[30s])) / sum(irate(ceph_rbd_read_latency_count[30s])))', + ||| + round( + sum(irate(ceph_rbd_write_latency_sum[30s])) / + sum(irate(ceph_rbd_write_latency_count[30s])) + ) + |||, + ||| + round( + sum(irate(ceph_rbd_read_latency_sum[30s])) / + sum(irate(ceph_rbd_read_latency_count[30s])) + ) + |||, 'Write', 'Read', 16, @@ -250,7 +266,16 @@ local u = import 'utils.libsonnet'; ) .addTarget( u.addTargetSchema( - 'topk(10, (sort((irate(ceph_rbd_write_ops[30s]) + on (image, pool, namespace) irate(ceph_rbd_read_ops[30s])))))', + ||| + topk(10, + ( + sort(( + irate(ceph_rbd_write_ops[30s]) + + on (image, pool, namespace) irate(ceph_rbd_read_ops[30s]) + )) + ) + ) + |||, '', 'table', 1, @@ -272,7 +297,15 @@ local u = import 'utils.libsonnet'; ) .addTarget( u.addTargetSchema( - 'topk(10, sort(sum(irate(ceph_rbd_read_bytes[30s]) + irate(ceph_rbd_write_bytes[30s])) by (pool, image, namespace)))', + ||| + topk(10, + sort( + sum( + irate(ceph_rbd_read_bytes[30s]) + irate(ceph_rbd_write_bytes[30s]) + ) by (pool, image, namespace) + ) + ) + |||, '', 'table', 1, @@ -294,7 +327,14 @@ local u = import 'utils.libsonnet'; ) .addTarget( u.addTargetSchema( - 'topk(10,\n sum(\n irate(ceph_rbd_write_latency_sum[30s]) / clamp_min(irate(ceph_rbd_write_latency_count[30s]), 1) +\n irate(ceph_rbd_read_latency_sum[30s]) / clamp_min(irate(ceph_rbd_read_latency_count[30s]), 1)\n ) by (pool, image, namespace)\n)', + ||| + topk(10, + sum( + irate(ceph_rbd_write_latency_sum[30s]) / clamp_min(irate(ceph_rbd_write_latency_count[30s]), 1) + + irate(ceph_rbd_read_latency_sum[30s]) / clamp_min(irate(ceph_rbd_read_latency_count[30s]), 1) + ) by (pool, image, namespace) + ) + |||, '', 'table', 1, diff --git a/monitoring/ceph-mixin/dashboards/rgw.libsonnet b/monitoring/ceph-mixin/dashboards/rgw.libsonnet index f7f76187f1427..9c3333ebde739 100644 --- a/monitoring/ceph-mixin/dashboards/rgw.libsonnet +++ b/monitoring/ceph-mixin/dashboards/rgw.libsonnet @@ -230,7 +230,11 @@ local u = import 'utils.libsonnet'; '', 's', 'short', - 'rate(ceph_rgw_get_initial_lat_sum[30s]) / rate(ceph_rgw_get_initial_lat_count[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata', + ||| + rate(ceph_rgw_get_initial_lat_sum[30s]) / + rate(ceph_rgw_get_initial_lat_count[30s]) * + on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata + |||, 'GET AVG', 0, 1, @@ -239,7 +243,11 @@ local u = import 'utils.libsonnet'; ).addTargets( [ u.addTargetSchema( - 'rate(ceph_rgw_put_initial_lat_sum[30s]) / rate(ceph_rgw_put_initial_lat_count[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata', + ||| + rate(ceph_rgw_put_initial_lat_sum[30s]) / + rate(ceph_rgw_put_initial_lat_count[30s]) * + on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata + |||, 'PUT AVG' ), ] @@ -249,7 +257,15 @@ local u = import 'utils.libsonnet'; '', 'none', 'short', - 'sum by (rgw_host) (label_replace(rate(ceph_rgw_req[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata, "rgw_host", "$1", "ceph_daemon", "rgw.(.*)"))', + ||| + sum by (rgw_host) ( + label_replace( + rate(ceph_rgw_req[30s]) * + on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata, + "rgw_host", "$1", "ceph_daemon", "rgw.(.*)" + ) + ) + |||, '{{rgw_host}}', 8, 1, @@ -261,7 +277,14 @@ local u = import 'utils.libsonnet'; 'Latencies are shown stacked, without a yaxis to provide a visual indication of GET latency imbalance across RGW hosts', 's', 'short', - 'label_replace(\n rate(ceph_rgw_get_initial_lat_sum[30s]) /\n rate(ceph_rgw_get_initial_lat_count[30s]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata,\n"rgw_host", "$1", "ceph_daemon", "rgw.(.*)")', + ||| + label_replace( + rate(ceph_rgw_get_initial_lat_sum[30s]) / + rate(ceph_rgw_get_initial_lat_count[30s]) * + on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata, + "rgw_host", "$1", "ceph_daemon", "rgw.(.*)" + ) + |||, '{{rgw_host}}', 15, 1, @@ -288,7 +311,14 @@ local u = import 'utils.libsonnet'; 'Total bytes transferred in/out through get/put operations, by radosgw instance', 'bytes', 'short', - 'label_replace(sum by (instance_id) (\n rate(ceph_rgw_get_b[30s]) + \n rate(ceph_rgw_put_b[30s])\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata, "rgw_host", "$1", "ceph_daemon", "rgw.(.*)")', + ||| + label_replace(sum by (instance_id) ( + rate(ceph_rgw_get_b[30s]) + + rate(ceph_rgw_put_b[30s])) * + on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata, + "rgw_host", "$1", "ceph_daemon", "rgw.(.*)" + ) + |||, '{{rgw_host}}', 8, 8, @@ -300,7 +330,14 @@ local u = import 'utils.libsonnet'; 'Latencies are shown stacked, without a yaxis to provide a visual indication of PUT latency imbalance across RGW hosts', 's', 'short', - 'label_replace(\n rate(ceph_rgw_put_initial_lat_sum[30s]) /\n rate(ceph_rgw_put_initial_lat_count[30s]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata,\n"rgw_host", "$1", "ceph_daemon", "rgw.(.*)")', + ||| + label_replace( + rate(ceph_rgw_put_initial_lat_sum[30s]) / + rate(ceph_rgw_put_initial_lat_count[30s]) * + on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata, + "rgw_host", "$1", "ceph_daemon", "rgw.(.*)" + ) + |||, '{{rgw_host}}', 15, 8, @@ -315,7 +352,13 @@ local u = import 'utils.libsonnet'; '', 'short', 'short', - 'sum(irate(haproxy_frontend_http_responses_total{code=~"$code",instance=~"$ingress_service",proxy=~"frontend"}[5m])) by (code)', + ||| + sum( + irate( + haproxy_frontend_http_responses_total{code=~"$code", instance=~"$ingress_service", proxy=~"frontend"}[5m] + ) + ) by (code) + |||, 'Frontend {{ code }}', 0, 12, @@ -330,7 +373,17 @@ local u = import 'utils.libsonnet'; true ) .addTargets( - [u.addTargetSchema('sum(irate(haproxy_backend_http_responses_total{code=~"$code",instance=~"$ingress_service",proxy=~"backend"}[5m])) by (code)', 'Backend {{ code }}')] + [ + u.addTargetSchema( + ||| + sum( + irate( + haproxy_backend_http_responses_total{code=~"$code", instance=~"$ingress_service", proxy=~"backend"}[5m] + ) + ) by (code) + |||, 'Backend {{ code }}' + ), + ] ) .addSeriesOverride([ { @@ -349,7 +402,13 @@ local u = import 'utils.libsonnet'; '', 'short', 'short', - 'sum(irate(haproxy_frontend_http_requests_total{proxy=~"frontend",instance=~"$ingress_service"}[5m])) by (instance)', + ||| + sum( + irate( + haproxy_frontend_http_requests_total{proxy=~"frontend", instance=~"$ingress_service"}[5m] + ) + ) by (instance) + |||, 'Requests', 5, 12, @@ -365,12 +424,58 @@ local u = import 'utils.libsonnet'; ) .addTargets( [ - u.addTargetSchema('sum(irate(haproxy_backend_response_errors_total{proxy=~"backend",instance=~"$ingress_service"}[5m])) by (instance)', 'Response errors', 'time_series', 2), - u.addTargetSchema('sum(irate(haproxy_frontend_request_errors_total{proxy=~"frontend",instance=~"$ingress_service"}[5m])) by (instance)', 'Requests errors'), - u.addTargetSchema('sum(irate(haproxy_backend_redispatch_warnings_total{proxy=~"backend",instance=~"$ingress_service"}[5m])) by (instance)', 'Backend redispatch', 'time_series', 2), - u.addTargetSchema('sum(irate(haproxy_backend_retry_warnings_total{proxy=~"backend",instance=~"$ingress_service"}[5m])) by (instance)', 'Backend retry', 'time_series', 2), - u.addTargetSchema('sum(irate(haproxy_frontend_requests_denied_total{proxy=~"frontend",instance=~"$ingress_service"}[5m])) by (instance)', 'Request denied', 'time_series', 2), - u.addTargetSchema('sum(haproxy_backend_current_queue{proxy=~"backend",instance=~"$ingress_service"}) by (instance)', 'Backend Queued', 'time_series', 2), + u.addTargetSchema( + ||| + sum( + irate( + haproxy_backend_response_errors_total{proxy=~"backend",instance=~"$ingress_service"}[5m] + ) + ) by (instance) + |||, 'Response errors', 'time_series', 2 + ), + u.addTargetSchema( + ||| + sum( + irate( + haproxy_frontend_request_errors_total{proxy=~"frontend", instance=~"$ingress_service"}[5m] + ) + ) by (instance) + |||, 'Requests errors' + ), + u.addTargetSchema( + ||| + sum( + irate( + haproxy_backend_redispatch_warnings_total{proxy=~"backend", instance=~"$ingress_service"}[5m] + ) + ) by (instance) + |||, 'Backend redispatch', 'time_series', 2 + ), + u.addTargetSchema( + ||| + sum( + irate( + haproxy_backend_retry_warnings_total{proxy=~"backend", instance=~"$ingress_service"}[5m] + ) + ) by (instance) + |||, 'Backend retry', 'time_series', 2 + ), + u.addTargetSchema( + ||| + sum( + irate( + haproxy_frontend_requests_denied_total{proxy=~"frontend", instance=~"$ingress_service"}[5m] + ) + ) by (instance) + |||, 'Request denied', 'time_series', 2 + ), + u.addTargetSchema( + ||| + sum( + haproxy_backend_current_queue{proxy=~"backend", instance=~"$ingress_service"} + ) by (instance) + |||, 'Backend Queued', 'time_series', 2 + ), ] ) .addSeriesOverride([ @@ -388,7 +493,13 @@ local u = import 'utils.libsonnet'; '', 'short', 'short', - 'sum(irate(haproxy_frontend_connections_total{proxy=~"frontend",instance=~"$ingress_service"}[5m])) by (instance)', + ||| + sum( + irate( + haproxy_frontend_connections_total{proxy=~"frontend", instance=~"$ingress_service"}[5m] + ) + ) by (instance) + |||, 'Front', 10, 12, @@ -404,8 +515,24 @@ local u = import 'utils.libsonnet'; ) .addTargets( [ - u.addTargetSchema('sum(irate(haproxy_backend_connection_attempts_total{proxy=~"backend",instance=~"$ingress_service"}[5m])) by (instance)', 'Back'), - u.addTargetSchema('sum(irate(haproxy_backend_connection_errors_total{proxy=~"backend",instance=~"$ingress_service"}[5m])) by (instance)', 'Back errors'), + u.addTargetSchema( + ||| + sum( + irate( + haproxy_backend_connection_attempts_total{proxy=~"backend", instance=~"$ingress_service"}[5m] + ) + ) by (instance) + |||, 'Back' + ), + u.addTargetSchema( + ||| + sum( + irate( + haproxy_backend_connection_errors_total{proxy=~"backend", instance=~"$ingress_service"}[5m] + ) + ) by (instance) + |||, 'Back errors' + ), ] ) .addSeriesOverride([ @@ -419,7 +546,13 @@ local u = import 'utils.libsonnet'; '', 'short', 'short', - 'sum(irate(haproxy_frontend_bytes_in_total{proxy=~"frontend",instance=~"$ingress_service"}[5m])*8) by (instance)', + ||| + sum( + irate( + haproxy_frontend_bytes_in_total{proxy=~"frontend", instance=~"$ingress_service"}[5m] + ) * 8 + ) by (instance) + |||, 'IN Front', 15, 12, @@ -435,9 +568,33 @@ local u = import 'utils.libsonnet'; ) .addTargets( [ - u.addTargetSchema('sum(irate(haproxy_frontend_bytes_out_total{proxy=~"frontend",instance=~"$ingress_service"}[5m])*8) by (instance)', 'OUT Front', 'time_series', 2), - u.addTargetSchema('sum(irate(haproxy_backend_bytes_in_total{proxy=~"backend",instance=~"$ingress_service"}[5m])*8) by (instance)', 'IN Back', 'time_series', 2), - u.addTargetSchema('sum(irate(haproxy_backend_bytes_out_total{proxy=~"backend",instance=~"$ingress_service"}[5m])*8) by (instance)', 'OUT Back', 'time_series', 2), + u.addTargetSchema( + ||| + sum( + irate( + haproxy_frontend_bytes_out_total{proxy=~"frontend", instance=~"$ingress_service"}[5m] + ) * 8 + ) by (instance) + |||, 'OUT Front', 'time_series', 2 + ), + u.addTargetSchema( + ||| + sum( + irate( + haproxy_backend_bytes_in_total{proxy=~"backend", instance=~"$ingress_service"}[5m] + ) * 8 + ) by (instance) + |||, 'IN Back', 'time_series', 2 + ), + u.addTargetSchema( + ||| + sum( + irate( + haproxy_backend_bytes_out_total{proxy=~"backend", instance=~"$ingress_service"}[5m] + ) * 8 + ) by (instance) + |||, 'OUT Back', 'time_series', 2 + ), ] ) .addSeriesOverride([ @@ -538,8 +695,16 @@ local u = import 'utils.libsonnet'; '', 's', 'short', - 'sum by (instance_id) (rate(ceph_rgw_get_initial_lat_sum[30s]) / rate(ceph_rgw_get_initial_lat_count[30s])) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}', - 'sum by (instance_id) (rate(ceph_rgw_put_initial_lat_sum[30s]) / rate(ceph_rgw_put_initial_lat_count[30s])) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}', + ||| + sum by (instance_id) ( + rate(ceph_rgw_get_initial_lat_sum[30s]) / rate(ceph_rgw_get_initial_lat_count[30s]) + ) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"} + |||, + ||| + sum by (instance_id) ( + rate(ceph_rgw_put_initial_lat_sum[30s]) / rate(ceph_rgw_put_initial_lat_count[30s]) + ) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"} + |||, 'GET {{ceph_daemon}}', 'PUT {{ceph_daemon}}', 0, @@ -553,8 +718,14 @@ local u = import 'utils.libsonnet'; '', 'bytes', 'short', - 'rate(ceph_rgw_get_b[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}', - 'rate(ceph_rgw_put_b[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}', + ||| + rate(ceph_rgw_get_b[30s]) * + on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"} + |||, + ||| + rate(ceph_rgw_put_b[30s]) * + on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"} + |||, 'GETs {{ceph_daemon}}', 'PUTs {{ceph_daemon}}', 6, @@ -574,8 +745,14 @@ local u = import 'utils.libsonnet'; '', 'short', 'short', - 'rate(ceph_rgw_failed_req[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}', - 'rate(ceph_rgw_get[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}', + ||| + rate(ceph_rgw_failed_req[30s]) * + on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"} + |||, + ||| + rate(ceph_rgw_get[30s]) * + on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"} + |||, 'Requests Failed {{ceph_daemon}}', 'GETs {{ceph_daemon}}', 13, @@ -586,11 +763,19 @@ local u = import 'utils.libsonnet'; .addTargets( [ u.addTargetSchema( - 'rate(ceph_rgw_put[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}', + ||| + rate(ceph_rgw_put[30s]) * + on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"} + |||, 'PUTs {{ceph_daemon}}' ), u.addTargetSchema( - '(\n rate(ceph_rgw_req[30s]) -\n (rate(ceph_rgw_get[30s]) + rate(ceph_rgw_put[30s]))\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}', + ||| + ( + rate(ceph_rgw_req[30s]) - + (rate(ceph_rgw_get[30s]) + rate(ceph_rgw_put[30s])) + ) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"} + |||, 'Other {{ceph_daemon}}' ), ] @@ -605,19 +790,34 @@ local u = import 'utils.libsonnet'; }, '$datasource', '', 'Under graph', 'pie', 'Workload Breakdown', 'current' ) .addTarget(u.addTargetSchema( - 'rate(ceph_rgw_failed_req[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}', + ||| + rate(ceph_rgw_failed_req[30s]) * + on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"} + |||, 'Failures {{ceph_daemon}}' )) .addTarget(u.addTargetSchema( - 'rate(ceph_rgw_get[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}', + ||| + rate(ceph_rgw_get[30s]) * + on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"} + |||, 'GETs {{ceph_daemon}}' )) .addTarget(u.addTargetSchema( - 'rate(ceph_rgw_put[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}', + ||| + rate(ceph_rgw_put[30s]) * + on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"} + |||, 'PUTs {{ceph_daemon}}' )) .addTarget(u.addTargetSchema( - '(\n rate(ceph_rgw_req[30s]) -\n (rate(ceph_rgw_get[30s]) + rate(ceph_rgw_put[30s]))\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}', + ||| + ( + rate(ceph_rgw_req[30s]) - + (rate(ceph_rgw_get[30s]) + rate(ceph_rgw_put[30s])) + ) * on (instance_id) group_left (ceph_daemon) + ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"} + |||, 'Other (DELETE,LIST) {{ceph_daemon}}' )) + { gridPos: { x: 20, y: 1, w: 4, h: 8 } }, ]), diff --git a/monitoring/ceph-mixin/dashboards_out/host-details.json b/monitoring/ceph-mixin/dashboards_out/host-details.json index 7b3c1df152efa..9abc6683ca0f9 100644 --- a/monitoring/ceph-mixin/dashboards_out/host-details.json +++ b/monitoring/ceph-mixin/dashboards_out/host-details.json @@ -192,7 +192,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (mode) (\n irate(node_cpu{instance=~\"($ceph_hosts)([\\\\.:].*)?\", mode=~\"(irq|nice|softirq|steal|system|user|iowait)\"}[1m]) or\n irate(node_cpu_seconds_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\", mode=~\"(irq|nice|softirq|steal|system|user|iowait)\"}[1m])\n) / scalar(\n sum(irate(node_cpu{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[1m]) or\n irate(node_cpu_seconds_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[1m]))\n) * 100", + "expr": "sum by (mode) (\n irate(node_cpu{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\", mode=~\"(irq|nice|softirq|steal|system|user|iowait)\"}[1m]) or\n irate(node_cpu_seconds_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\", mode=~\"(irq|nice|softirq|steal|system|user|iowait)\"}[1m])\n) / (\n scalar(\n sum(irate(node_cpu{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[1m]) or\n irate(node_cpu_seconds_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[1m]))\n ) * 100\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{mode}}", @@ -293,28 +293,28 @@ "steppedLine": false, "targets": [ { - "expr": "node_memory_MemFree{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_MemFree_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"} ", + "expr": "node_memory_MemFree{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"} or\n node_memory_MemFree_bytes{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "Free", "refId": "A" }, { - "expr": "node_memory_MemTotal{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_MemTotal_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"} ", + "expr": "node_memory_MemTotal{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"} or\n node_memory_MemTotal_bytes{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "total", "refId": "B" }, { - "expr": "(node_memory_Cached{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_Cached_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"}) + \n(node_memory_Buffers{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_Buffers_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"}) +\n(node_memory_Slab{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_Slab_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"}) \n", + "expr": "(\n node_memory_Cached{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"} or\n node_memory_Cached_bytes{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}\n) + (\n node_memory_Buffers{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"} or\n node_memory_Buffers_bytes{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}\n) + (\n node_memory_Slab{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"} or\n node_memory_Slab_bytes{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "buffers/cache", "refId": "C" }, { - "expr": "(node_memory_MemTotal{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_MemTotal_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"})- (\n (node_memory_MemFree{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_MemFree_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"}) + \n (node_memory_Cached{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_Cached_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"}) + \n (node_memory_Buffers{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_Buffers_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"}) +\n (node_memory_Slab{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_Slab_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"})\n )\n \n", + "expr": "(\n node_memory_MemTotal{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"} or\n node_memory_MemTotal_bytes{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}\n) - (\n (\n node_memory_MemFree{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"} or\n node_memory_MemFree_bytes{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}\n ) + (\n node_memory_Cached{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"} or\n node_memory_Cached_bytes{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}\n ) + (\n node_memory_Buffers{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"} or\n node_memory_Buffers_bytes{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}\n ) +\n (\n node_memory_Slab{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"} or\n node_memory_Slab_bytes{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}\n )\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "used", @@ -405,14 +405,14 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (device) (\n irate(node_network_receive_bytes{instance=~\"($ceph_hosts)([\\\\.:].*)?\",device!=\"lo\"}[1m]) or \n irate(node_network_receive_bytes_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\",device!=\"lo\"}[1m])\n)", + "expr": "sum by (device) (\n irate(\n node_network_receive_bytes{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\",device!=\"lo\"}[1m]) or\n irate(node_network_receive_bytes_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\",device!=\"lo\"}[1m]\n )\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}}.rx", "refId": "A" }, { - "expr": "sum by (device) (\n irate(node_network_transmit_bytes{instance=~\"($ceph_hosts)([\\\\.:].*)?\",device!=\"lo\"}[1m]) or\n irate(node_network_transmit_bytes_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\",device!=\"lo\"}[1m])\n)", + "expr": "sum by (device) (\n irate(node_network_transmit_bytes{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\",device!=\"lo\"}[1m]) or\n irate(node_network_transmit_bytes_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\",device!=\"lo\"}[1m])\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}}.tx", @@ -503,14 +503,14 @@ "steppedLine": false, "targets": [ { - "expr": "irate(node_network_receive_drop{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m]) or irate(node_network_receive_drop_total{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m])", + "expr": "irate(node_network_receive_drop{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[1m]) or\n irate(node_network_receive_drop_total{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[1m])\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}}.rx", "refId": "A" }, { - "expr": "irate(node_network_transmit_drop{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m]) or irate(node_network_transmit_drop_total{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m])", + "expr": "irate(node_network_transmit_drop{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[1m]) or\n irate(node_network_transmit_drop_total{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[1m])\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}}.tx", @@ -615,7 +615,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum(ceph_osd_stat_bytes and on (ceph_daemon) ceph_disk_occupation{instance=~\"($ceph_hosts)([\\\\.:].*)?\"})", + "expr": "sum(\n ceph_osd_stat_bytes and\n on (ceph_daemon) ceph_disk_occupation{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "", @@ -683,14 +683,14 @@ "steppedLine": false, "targets": [ { - "expr": "irate(node_network_receive_errs{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m]) or irate(node_network_receive_errs_total{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m])", + "expr": "irate(node_network_receive_errs{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[1m]) or\n irate(node_network_receive_errs_total{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[1m])\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}}.rx", "refId": "A" }, { - "expr": "irate(node_network_transmit_errs{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m]) or irate(node_network_transmit_errs_total{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m])", + "expr": "irate(node_network_transmit_errs{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[1m]) or\n irate(node_network_transmit_errs_total{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[1m])\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}}.tx", @@ -800,14 +800,14 @@ "steppedLine": false, "targets": [ { - "expr": "label_replace(\n (\n irate(node_disk_writes_completed{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) or\n irate(node_disk_writes_completed_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m])\n ),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n)\n* on(instance, device) group_left(ceph_daemon)\n label_replace(\n label_replace(\n ceph_disk_occupation_human,\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n )", + "expr": "label_replace(\n (\n irate(node_disk_writes_completed{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[5m]) or\n irate(node_disk_writes_completed_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[5m])\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human, \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}}({{ceph_daemon}}) writes", "refId": "A" }, { - "expr": "label_replace(\n (irate(node_disk_reads_completed{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) or irate(node_disk_reads_completed_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m])),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n)\n* on(instance, device) group_left(ceph_daemon)\n label_replace(\n label_replace(\n ceph_disk_occupation_human,\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n )", + "expr": "label_replace(\n (\n irate(node_disk_reads_completed{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[5m]) or\n irate(node_disk_reads_completed_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[5m])\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human,\"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}}({{ceph_daemon}}) reads", @@ -898,14 +898,14 @@ "steppedLine": false, "targets": [ { - "expr": "label_replace((irate(node_disk_bytes_written{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) or irate(node_disk_written_bytes_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m])), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation_human, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")", + "expr": "label_replace(\n (\n irate(node_disk_bytes_written{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[5m]) or\n irate(node_disk_written_bytes_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[5m])\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device)\n group_left(ceph_daemon) label_replace(\n label_replace(ceph_disk_occupation_human, \"device\", \"$1\", \"device\", \"/dev/(.*)\"),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}}({{ceph_daemon}}) write", "refId": "A" }, { - "expr": "label_replace((irate(node_disk_bytes_read{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) or irate(node_disk_read_bytes_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m])), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation_human, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")", + "expr": "label_replace(\n (\n irate(node_disk_bytes_read{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[5m]) or\n irate(node_disk_read_bytes_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[5m])\n ),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device)\n group_left(ceph_daemon) label_replace(\n label_replace(ceph_disk_occupation_human, \"device\", \"$1\", \"device\", \"/dev/(.*)\"),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}}({{ceph_daemon}}) read", @@ -991,7 +991,7 @@ "steppedLine": false, "targets": [ { - "expr": "max by(instance,device) (label_replace((irate(node_disk_write_time_seconds_total{ instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) ) / clamp_min(irate(node_disk_writes_completed_total{ instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]), 0.001) or (irate(node_disk_read_time_seconds_total{ instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) ) / clamp_min(irate(node_disk_reads_completed_total{ instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]), 0.001), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")) * on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation_human{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")", + "expr": "max by(instance, device) (label_replace(\n (irate(node_disk_write_time_seconds_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[5m])) /\n clamp_min(irate(node_disk_writes_completed_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[5m]), 0.001) or\n (irate(node_disk_read_time_seconds_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[5m])) /\n clamp_min(irate(node_disk_reads_completed_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[5m]), 0.001),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}}({{ceph_daemon}})", @@ -1077,7 +1077,7 @@ "steppedLine": false, "targets": [ { - "expr": "label_replace(((irate(node_disk_io_time_ms{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) / 10 ) or irate(node_disk_io_time_seconds_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) * 100), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation_human{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")", + "expr": "label_replace(\n (\n (irate(node_disk_io_time_ms{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[5m]) / 10) or\n irate(node_disk_io_time_seconds_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[5m]) * 100\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(ceph_disk_occupation_human{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}}({{ceph_daemon}})", diff --git a/monitoring/ceph-mixin/dashboards_out/hosts-overview.json b/monitoring/ceph-mixin/dashboards_out/hosts-overview.json index 3572d7ad4132a..d43696988a90e 100644 --- a/monitoring/ceph-mixin/dashboards_out/hosts-overview.json +++ b/monitoring/ceph-mixin/dashboards_out/hosts-overview.json @@ -187,7 +187,7 @@ "tableColumn": "", "targets": [ { - "expr": "avg(\n 1 - (\n avg by(instance) \n (irate(node_cpu_seconds_total{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m]) or\n irate(node_cpu{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m]))\n )\n )", + "expr": "avg(1 - (\n avg by(instance) (\n irate(node_cpu_seconds_total{mode=\\'idle\\',instance=~\\\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\\\"}[1m]) or\n irate(node_cpu{mode=\\'idle\\',instance=~\\\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\\\"}[1m])\n )\n))\n", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -270,7 +270,7 @@ "tableColumn": "", "targets": [ { - "expr": "avg (((node_memory_MemTotal{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_MemTotal_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"})- (\n (node_memory_MemFree{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_MemFree_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}) + \n (node_memory_Cached{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_Cached_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}) + \n (node_memory_Buffers{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_Buffers_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}) +\n (node_memory_Slab{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_Slab_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"})\n )) /\n (node_memory_MemTotal{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_MemTotal_bytes{instance=~\"($osd_hosts|$rgw_hosts|$mon_hosts|$mds_hosts).*\"} ))", + "expr": "avg ((\n (\n node_memory_MemTotal{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_MemTotal_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}\n ) - ((\n node_memory_MemFree{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_MemFree_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}) +\n (\n node_memory_Cached{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_Cached_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}\n ) + (\n node_memory_Buffers{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_Buffers_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}\n ) + (\n node_memory_Slab{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_Slab_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}\n )\n )\n)\n(\n node_memory_MemTotal{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_MemTotal_bytes{instance=~\"($osd_hosts|$rgw_hosts|$mon_hosts|$mds_hosts).*\"}\n))\n", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -353,7 +353,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum ((irate(node_disk_reads_completed{instance=~\"($osd_hosts).*\"}[5m]) or irate(node_disk_reads_completed_total{instance=~\"($osd_hosts).*\"}[5m]) ) + \n(irate(node_disk_writes_completed{instance=~\"($osd_hosts).*\"}[5m]) or irate(node_disk_writes_completed_total{instance=~\"($osd_hosts).*\"}[5m])))", + "expr": "sum ((\n irate(node_disk_reads_completed{instance=~\"($osd_hosts).*\"}[5m]) or\n irate(node_disk_reads_completed_total{instance=~\"($osd_hosts).*\"}[5m])\n) + (\n irate(node_disk_writes_completed{instance=~\"($osd_hosts).*\"}[5m]) or\n irate(node_disk_writes_completed_total{instance=~\"($osd_hosts).*\"}[5m])\n))\n", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -436,7 +436,7 @@ "tableColumn": "", "targets": [ { - "expr": "avg (\n label_replace((irate(node_disk_io_time_ms[5m]) / 10 ) or\n (irate(node_disk_io_time_seconds_total[5m]) * 100), \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n ) *\n on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation_human{instance=~\"($osd_hosts).*\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^.:]*).*\")\n)", + "expr": "avg (\n label_replace(\n (irate(node_disk_io_time_ms[5m]) / 10 ) or\n (irate(node_disk_io_time_seconds_total[5m]) * 100),\n \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n ) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{instance=~\"($osd_hosts).*\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n )\n)\n", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -583,7 +583,7 @@ "steppedLine": false, "targets": [ { - "expr": "topk(10,100 * ( 1 - (\n avg by(instance) \n (irate(node_cpu_seconds_total{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m]) or\n irate(node_cpu{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m]))\n )\n )\n)", + "expr": "topk(10,\n 100 * (\n 1 - (\n avg by(instance) (\n irate(node_cpu_seconds_total{mode=\\'idle\\',instance=~\\\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\\\"}[1m]) or\n irate(node_cpu{mode=\\'idle\\',instance=~\\\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\\\"}[1m])\n )\n )\n )\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{instance}}", diff --git a/monitoring/ceph-mixin/dashboards_out/osd-device-details.json b/monitoring/ceph-mixin/dashboards_out/osd-device-details.json index 3b45dc967a53a..c82a59dfd05b8 100644 --- a/monitoring/ceph-mixin/dashboards_out/osd-device-details.json +++ b/monitoring/ceph-mixin/dashboards_out/osd-device-details.json @@ -104,14 +104,14 @@ "steppedLine": false, "targets": [ { - "expr": "irate(ceph_osd_op_r_latency_sum{ceph_daemon=~\"$osd\"}[1m]) / on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m])", + "expr": "irate(ceph_osd_op_r_latency_sum{ceph_daemon=~\"$osd\"}[1m]) /\n on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m])\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "read", "refId": "A" }, { - "expr": "irate(ceph_osd_op_w_latency_sum{ceph_daemon=~\"$osd\"}[1m]) / on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m])", + "expr": "irate(ceph_osd_op_w_latency_sum{ceph_daemon=~\"$osd\"}[1m]) /\n on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m])\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "write", @@ -417,14 +417,14 @@ "steppedLine": false, "targets": [ { - "expr": "(label_replace(irate(node_disk_read_time_seconds_total[1m]) / irate(node_disk_reads_completed_total[1m]), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") and on (instance, device) label_replace(label_replace(ceph_disk_occupation_human{ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"))", + "expr": "(\n label_replace(\n irate(node_disk_read_time_seconds_total[1m]) / irate(node_disk_reads_completed_total[1m]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n ) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{instance}}/{{device}} Reads", "refId": "A" }, { - "expr": "(label_replace(irate(node_disk_write_time_seconds_total[1m]) / irate(node_disk_writes_completed_total[1m]), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") and on (instance, device) label_replace(label_replace(ceph_disk_occupation_human{ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"))", + "expr": "(\n label_replace(\n irate(node_disk_write_time_seconds_total[1m]) / irate(node_disk_writes_completed_total[1m]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") and on (instance, device)\n label_replace(\n label_replace(\n ceph_disk_occupation_human{ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n )\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{instance}}/{{device}} Writes", @@ -515,14 +515,14 @@ "steppedLine": false, "targets": [ { - "expr": "label_replace(irate(node_disk_writes_completed_total[1m]), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") and on (instance, device) label_replace(label_replace(ceph_disk_occupation_human{ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")", + "expr": "label_replace(\n irate(node_disk_writes_completed_total[1m]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}} on {{instance}} Writes", "refId": "A" }, { - "expr": "label_replace(irate(node_disk_reads_completed_total[1m]), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") and on (instance, device) label_replace(label_replace(ceph_disk_occupation_human{ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")", + "expr": "label_replace(\n irate(node_disk_reads_completed_total[1m]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}} on {{instance}} Reads", @@ -613,14 +613,14 @@ "steppedLine": false, "targets": [ { - "expr": "label_replace(irate(node_disk_read_bytes_total[1m]), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") and on (instance, device) label_replace(label_replace(ceph_disk_occupation_human{ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")", + "expr": "label_replace(\n irate(node_disk_read_bytes_total[1m]), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{instance}} {{device}} Reads", "refId": "A" }, { - "expr": "label_replace(irate(node_disk_written_bytes_total[1m]), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") and on (instance, device) label_replace(label_replace(ceph_disk_occupation_human{ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")", + "expr": "label_replace(\n irate(node_disk_written_bytes_total[1m]), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{instance}} {{device}} Writes", @@ -706,7 +706,7 @@ "steppedLine": false, "targets": [ { - "expr": "label_replace(irate(node_disk_io_time_seconds_total[1m]), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") and on (instance, device) label_replace(label_replace(ceph_disk_occupation_human{ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")", + "expr": "label_replace(\n irate(node_disk_io_time_seconds_total[1m]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}} on {{instance}}", diff --git a/monitoring/ceph-mixin/dashboards_out/osds-overview.json b/monitoring/ceph-mixin/dashboards_out/osds-overview.json index ffcf0601563ee..54db59b45022d 100644 --- a/monitoring/ceph-mixin/dashboards_out/osds-overview.json +++ b/monitoring/ceph-mixin/dashboards_out/osds-overview.json @@ -101,14 +101,14 @@ "refId": "A" }, { - "expr": "max (irate(ceph_osd_op_r_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m]) * 1000)", + "expr": "max(\n irate(ceph_osd_op_r_latency_sum[1m]) /\n on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m]) * 1000\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "MAX read", "refId": "B" }, { - "expr": "quantile(0.95,\n (irate(ceph_osd_op_r_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m]) * 1000)\n)", + "expr": "quantile(0.95,\n (\n irate(ceph_osd_op_r_latency_sum[1m]) /\n on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m])\n * 1000\n )\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "@95%ile", @@ -222,7 +222,7 @@ ], "targets": [ { - "expr": "topk(10,\n (sort(\n (irate(ceph_osd_op_r_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m]) * 1000)\n ))\n)\n\n", + "expr": "topk(10,\n (sort(\n (\n irate(ceph_osd_op_r_latency_sum[1m]) /\n on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m]) *\n 1000\n )\n ))\n)\n", "format": "table", "instant": true, "intervalFactor": 1, @@ -281,21 +281,21 @@ "steppedLine": false, "targets": [ { - "expr": "avg (irate(ceph_osd_op_w_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m]) * 1000)", + "expr": "avg(\n irate(ceph_osd_op_w_latency_sum[1m]) /\n on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m])\n * 1000\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "AVG write", "refId": "A" }, { - "expr": "max (irate(ceph_osd_op_w_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m]) * 1000)", + "expr": "max(\n irate(ceph_osd_op_w_latency_sum[1m]) /\n on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m]) *\n 1000\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "MAX write", "refId": "B" }, { - "expr": "quantile(0.95,\n (irate(ceph_osd_op_w_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m]) * 1000)\n)", + "expr": "quantile(0.95, (\n irate(ceph_osd_op_w_latency_sum[1m]) /\n on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m]) *\n 1000\n))\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "@95%ile write", @@ -409,7 +409,7 @@ ], "targets": [ { - "expr": "topk(10,\n (sort(\n (irate(ceph_osd_op_w_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m]) * 1000)\n ))\n)\n\n", + "expr": "topk(10,\n (sort(\n (irate(ceph_osd_op_w_latency_sum[1m]) /\n on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m]) *\n 1000)\n ))\n)\n", "format": "table", "instant": true, "intervalFactor": 1, @@ -623,7 +623,7 @@ "steppedLine": false, "targets": [ { - "expr": "ceph_osd_numpg\n", + "expr": "ceph_osd_numpg", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -728,7 +728,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum(ceph_bluestore_onode_hits)/(sum(ceph_bluestore_onode_hits) + sum(ceph_bluestore_onode_misses))", + "expr": "sum(ceph_bluestore_onode_hits) / (\n sum(ceph_bluestore_onode_hits) +\n sum(ceph_bluestore_onode_misses)\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "", diff --git a/monitoring/ceph-mixin/dashboards_out/pool-detail.json b/monitoring/ceph-mixin/dashboards_out/pool-detail.json index 9a8518e151c61..26ce7bf41cdc0 100644 --- a/monitoring/ceph-mixin/dashboards_out/pool-detail.json +++ b/monitoring/ceph-mixin/dashboards_out/pool-detail.json @@ -104,7 +104,7 @@ "tableColumn": "", "targets": [ { - "expr": "(ceph_pool_stored / (ceph_pool_stored + ceph_pool_max_avail)) * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\"}", + "expr": "(ceph_pool_stored / (ceph_pool_stored + ceph_pool_max_avail)) *\n on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\"}\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "", @@ -186,7 +186,7 @@ "tableColumn": "", "targets": [ { - "expr": "(ceph_pool_max_avail / deriv(ceph_pool_stored[6h])) * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\"} > 0", + "expr": "(ceph_pool_max_avail / deriv(ceph_pool_stored[6h])) *\n on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\"} > 0\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "", @@ -252,7 +252,7 @@ "steppedLine": false, "targets": [ { - "expr": "deriv(ceph_pool_objects[1m]) * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\"}", + "expr": "deriv(ceph_pool_objects[1m]) *\n on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\"}\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "Objects per second", @@ -346,14 +346,14 @@ "steppedLine": false, "targets": [ { - "expr": "irate(ceph_pool_rd[1m]) * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\"}", + "expr": "irate(ceph_pool_rd[1m]) *\n on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\"}\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "reads", "refId": "A" }, { - "expr": "irate(ceph_pool_wr[1m]) * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\"}", + "expr": "irate(ceph_pool_wr[1m]) *\n on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\"}\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "writes", @@ -447,14 +447,14 @@ "steppedLine": false, "targets": [ { - "expr": "irate(ceph_pool_rd_bytes[1m]) + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\"}", + "expr": "irate(ceph_pool_rd_bytes[1m]) +\n on(pool_id) group_left(instance, name) ceph_pool_metadata{name=~\"$pool_name\"}\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "reads", "refId": "A" }, { - "expr": "irate(ceph_pool_wr_bytes[1m]) + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\"}", + "expr": "irate(ceph_pool_wr_bytes[1m]) +\n on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\"}\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "writes", @@ -543,7 +543,7 @@ "steppedLine": false, "targets": [ { - "expr": "ceph_pool_objects * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\"}", + "expr": "ceph_pool_objects *\n on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\"}\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "Number of Objects", diff --git a/monitoring/ceph-mixin/dashboards_out/pool-overview.json b/monitoring/ceph-mixin/dashboards_out/pool-overview.json index 5767d40eb1a1e..ecd77749d0571 100644 --- a/monitoring/ceph-mixin/dashboards_out/pool-overview.json +++ b/monitoring/ceph-mixin/dashboards_out/pool-overview.json @@ -528,7 +528,7 @@ "#d44a3a" ], "datasource": "$datasource", - "description": "Indicates how suitable the data is within the pools that are/have been enabled for compression - averaged across all pools holding compressed data\n", + "description": "Indicates how suitable the data is within the pools that are/have been enabled for compression - averaged across all pools holding compressed data", "format": "percent", "gauge": { "maxValue": 100, @@ -580,7 +580,7 @@ "tableColumn": "", "targets": [ { - "expr": "(sum(ceph_pool_compress_under_bytes > 0) / sum(ceph_pool_stored_raw and ceph_pool_compress_under_bytes > 0)) * 100", + "expr": "(\n sum(ceph_pool_compress_under_bytes > 0) /\n sum(ceph_pool_stored_raw and ceph_pool_compress_under_bytes > 0)\n) * 100\n", "format": "table", "intervalFactor": 1, "legendFormat": "", @@ -1053,7 +1053,7 @@ ], "targets": [ { - "expr": "(ceph_pool_compress_under_bytes / ceph_pool_compress_bytes_used > 0) and on(pool_id) (((ceph_pool_compress_under_bytes > 0) / ceph_pool_stored_raw) * 100 > 0.5)", + "expr": "(ceph_pool_compress_under_bytes / ceph_pool_compress_bytes_used > 0) and on(pool_id) (\n ((ceph_pool_compress_under_bytes > 0) / ceph_pool_stored_raw) * 100 > 0.5\n)\n", "format": "table", "instant": true, "intervalFactor": 1, @@ -1197,14 +1197,14 @@ "steppedLine": false, "targets": [ { - "expr": "topk($topk,round((rate(ceph_pool_rd[30s]) + rate(ceph_pool_wr[30s])),1) * on(pool_id) group_left(instance,name) ceph_pool_metadata) ", + "expr": "topk($topk,\n round(\n (rate(ceph_pool_rd[30s]) + rate(ceph_pool_wr[30s])),\n 1\n ) * on(pool_id) group_left(instance,name) ceph_pool_metadata)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{name}} ", "refId": "A" }, { - "expr": "topk($topk,rate(ceph_pool_wr[30s]) + on(pool_id) group_left(instance,name) ceph_pool_metadata) ", + "expr": "topk($topk,\n rate(ceph_pool_wr[30s]) +\n on(pool_id) group_left(instance,name) ceph_pool_metadata\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{name}} - write", @@ -1290,7 +1290,7 @@ "steppedLine": false, "targets": [ { - "expr": "topk($topk,(rate(ceph_pool_rd_bytes[30s]) + rate(ceph_pool_wr_bytes[30s])) * on(pool_id) group_left(instance,name) ceph_pool_metadata)", + "expr": "topk($topk,\n (rate(ceph_pool_rd_bytes[30s]) + rate(ceph_pool_wr_bytes[30s])) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{name}}", diff --git a/monitoring/ceph-mixin/dashboards_out/radosgw-detail.json b/monitoring/ceph-mixin/dashboards_out/radosgw-detail.json index 4d68906f2ba07..2bd8ac4055fa0 100644 --- a/monitoring/ceph-mixin/dashboards_out/radosgw-detail.json +++ b/monitoring/ceph-mixin/dashboards_out/radosgw-detail.json @@ -105,14 +105,14 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (instance_id) (rate(ceph_rgw_get_initial_lat_sum[30s]) / rate(ceph_rgw_get_initial_lat_count[30s])) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}", + "expr": "sum by (instance_id) (\n rate(ceph_rgw_get_initial_lat_sum[30s]) / rate(ceph_rgw_get_initial_lat_count[30s])\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "GET {{ceph_daemon}}", "refId": "A" }, { - "expr": "sum by (instance_id) (rate(ceph_rgw_put_initial_lat_sum[30s]) / rate(ceph_rgw_put_initial_lat_count[30s])) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}", + "expr": "sum by (instance_id) (\n rate(ceph_rgw_put_initial_lat_sum[30s]) / rate(ceph_rgw_put_initial_lat_count[30s])\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "PUT {{ceph_daemon}}", @@ -198,14 +198,14 @@ "steppedLine": false, "targets": [ { - "expr": "rate(ceph_rgw_get_b[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}", + "expr": "rate(ceph_rgw_get_b[30s]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "GETs {{ceph_daemon}}", "refId": "A" }, { - "expr": "rate(ceph_rgw_put_b[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}", + "expr": "rate(ceph_rgw_put_b[30s]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "PUTs {{ceph_daemon}}", @@ -297,28 +297,28 @@ "steppedLine": false, "targets": [ { - "expr": "rate(ceph_rgw_failed_req[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}", + "expr": "rate(ceph_rgw_failed_req[30s]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "Requests Failed {{ceph_daemon}}", "refId": "A" }, { - "expr": "rate(ceph_rgw_get[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}", + "expr": "rate(ceph_rgw_get[30s]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "GETs {{ceph_daemon}}", "refId": "B" }, { - "expr": "rate(ceph_rgw_put[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}", + "expr": "rate(ceph_rgw_put[30s]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "PUTs {{ceph_daemon}}", "refId": "C" }, { - "expr": "(\n rate(ceph_rgw_req[30s]) -\n (rate(ceph_rgw_get[30s]) + rate(ceph_rgw_put[30s]))\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}", + "expr": "(\n rate(ceph_rgw_req[30s]) -\n (rate(ceph_rgw_get[30s]) + rate(ceph_rgw_put[30s]))\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "Other {{ceph_daemon}}", @@ -387,28 +387,28 @@ "pieType": "pie", "targets": [ { - "expr": "rate(ceph_rgw_failed_req[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}", + "expr": "rate(ceph_rgw_failed_req[30s]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "Failures {{ceph_daemon}}", "refId": "A" }, { - "expr": "rate(ceph_rgw_get[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}", + "expr": "rate(ceph_rgw_get[30s]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "GETs {{ceph_daemon}}", "refId": "B" }, { - "expr": "rate(ceph_rgw_put[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}", + "expr": "rate(ceph_rgw_put[30s]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "PUTs {{ceph_daemon}}", "refId": "C" }, { - "expr": "(\n rate(ceph_rgw_req[30s]) -\n (rate(ceph_rgw_get[30s]) + rate(ceph_rgw_put[30s]))\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}", + "expr": "(\n rate(ceph_rgw_req[30s]) -\n (rate(ceph_rgw_get[30s]) + rate(ceph_rgw_put[30s]))\n) * on (instance_id) group_left (ceph_daemon)\n ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "Other (DELETE,LIST) {{ceph_daemon}}", diff --git a/monitoring/ceph-mixin/dashboards_out/radosgw-overview.json b/monitoring/ceph-mixin/dashboards_out/radosgw-overview.json index 7f9375290fc0c..1c93adc4697dd 100644 --- a/monitoring/ceph-mixin/dashboards_out/radosgw-overview.json +++ b/monitoring/ceph-mixin/dashboards_out/radosgw-overview.json @@ -99,14 +99,14 @@ "steppedLine": false, "targets": [ { - "expr": "rate(ceph_rgw_get_initial_lat_sum[30s]) / rate(ceph_rgw_get_initial_lat_count[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata", + "expr": "rate(ceph_rgw_get_initial_lat_sum[30s]) /\n rate(ceph_rgw_get_initial_lat_count[30s]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "GET AVG", "refId": "A" }, { - "expr": "rate(ceph_rgw_put_initial_lat_sum[30s]) / rate(ceph_rgw_put_initial_lat_count[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata", + "expr": "rate(ceph_rgw_put_initial_lat_sum[30s]) /\n rate(ceph_rgw_put_initial_lat_count[30s]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "PUT AVG", @@ -192,7 +192,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (rgw_host) (label_replace(rate(ceph_rgw_req[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata, \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"))", + "expr": "sum by (rgw_host) (\n label_replace(\n rate(ceph_rgw_req[30s]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata,\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n )\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{rgw_host}}", @@ -278,7 +278,7 @@ "steppedLine": false, "targets": [ { - "expr": "label_replace(\n rate(ceph_rgw_get_initial_lat_sum[30s]) /\n rate(ceph_rgw_get_initial_lat_count[30s]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata,\n\"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\")", + "expr": "label_replace(\n rate(ceph_rgw_get_initial_lat_sum[30s]) /\n rate(ceph_rgw_get_initial_lat_count[30s]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata,\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{rgw_host}}", @@ -457,7 +457,7 @@ "steppedLine": false, "targets": [ { - "expr": "label_replace(sum by (instance_id) (\n rate(ceph_rgw_get_b[30s]) + \n rate(ceph_rgw_put_b[30s])\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata, \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\")", + "expr": "label_replace(sum by (instance_id) (\n rate(ceph_rgw_get_b[30s]) +\n rate(ceph_rgw_put_b[30s])) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata,\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{rgw_host}}", @@ -543,7 +543,7 @@ "steppedLine": false, "targets": [ { - "expr": "label_replace(\n rate(ceph_rgw_put_initial_lat_sum[30s]) /\n rate(ceph_rgw_put_initial_lat_count[30s]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata,\n\"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\")", + "expr": "label_replace(\n rate(ceph_rgw_put_initial_lat_sum[30s]) /\n rate(ceph_rgw_put_initial_lat_count[30s]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata,\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{rgw_host}}", @@ -673,14 +673,14 @@ "steppedLine": false, "targets": [ { - "expr": "sum(irate(haproxy_frontend_http_responses_total{code=~\"$code\",instance=~\"$ingress_service\",proxy=~\"frontend\"}[5m])) by (code)", + "expr": "sum(\n irate(\n haproxy_frontend_http_responses_total{code=~\"$code\", instance=~\"$ingress_service\", proxy=~\"frontend\"}[5m]\n )\n) by (code)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "Frontend {{ code }}", "refId": "A" }, { - "expr": "sum(irate(haproxy_backend_http_responses_total{code=~\"$code\",instance=~\"$ingress_service\",proxy=~\"backend\"}[5m])) by (code)", + "expr": "sum(\n irate(\n haproxy_backend_http_responses_total{code=~\"$code\", instance=~\"$ingress_service\", proxy=~\"backend\"}[5m]\n )\n) by (code)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "Backend {{ code }}", @@ -777,49 +777,49 @@ "steppedLine": false, "targets": [ { - "expr": "sum(irate(haproxy_frontend_http_requests_total{proxy=~\"frontend\",instance=~\"$ingress_service\"}[5m])) by (instance)", + "expr": "sum(\n irate(\n haproxy_frontend_http_requests_total{proxy=~\"frontend\", instance=~\"$ingress_service\"}[5m]\n )\n) by (instance)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "Requests", "refId": "A" }, { - "expr": "sum(irate(haproxy_backend_response_errors_total{proxy=~\"backend\",instance=~\"$ingress_service\"}[5m])) by (instance)", + "expr": "sum(\n irate(\n haproxy_backend_response_errors_total{proxy=~\"backend\",instance=~\"$ingress_service\"}[5m]\n )\n) by (instance)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "Response errors", "refId": "B" }, { - "expr": "sum(irate(haproxy_frontend_request_errors_total{proxy=~\"frontend\",instance=~\"$ingress_service\"}[5m])) by (instance)", + "expr": "sum(\n irate(\n haproxy_frontend_request_errors_total{proxy=~\"frontend\", instance=~\"$ingress_service\"}[5m]\n )\n) by (instance)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "Requests errors", "refId": "C" }, { - "expr": "sum(irate(haproxy_backend_redispatch_warnings_total{proxy=~\"backend\",instance=~\"$ingress_service\"}[5m])) by (instance)", + "expr": "sum(\n irate(\n haproxy_backend_redispatch_warnings_total{proxy=~\"backend\", instance=~\"$ingress_service\"}[5m]\n )\n) by (instance)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "Backend redispatch", "refId": "D" }, { - "expr": "sum(irate(haproxy_backend_retry_warnings_total{proxy=~\"backend\",instance=~\"$ingress_service\"}[5m])) by (instance)", + "expr": "sum(\n irate(\n haproxy_backend_retry_warnings_total{proxy=~\"backend\", instance=~\"$ingress_service\"}[5m]\n )\n) by (instance)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "Backend retry", "refId": "E" }, { - "expr": "sum(irate(haproxy_frontend_requests_denied_total{proxy=~\"frontend\",instance=~\"$ingress_service\"}[5m])) by (instance)", + "expr": "sum(\n irate(\n haproxy_frontend_requests_denied_total{proxy=~\"frontend\", instance=~\"$ingress_service\"}[5m]\n )\n) by (instance)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "Request denied", "refId": "F" }, { - "expr": "sum(haproxy_backend_current_queue{proxy=~\"backend\",instance=~\"$ingress_service\"}) by (instance)", + "expr": "sum(\n haproxy_backend_current_queue{proxy=~\"backend\", instance=~\"$ingress_service\"}\n) by (instance)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "Backend Queued", @@ -912,21 +912,21 @@ "steppedLine": false, "targets": [ { - "expr": "sum(irate(haproxy_frontend_connections_total{proxy=~\"frontend\",instance=~\"$ingress_service\"}[5m])) by (instance)", + "expr": "sum(\n irate(\n haproxy_frontend_connections_total{proxy=~\"frontend\", instance=~\"$ingress_service\"}[5m]\n )\n) by (instance)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "Front", "refId": "A" }, { - "expr": "sum(irate(haproxy_backend_connection_attempts_total{proxy=~\"backend\",instance=~\"$ingress_service\"}[5m])) by (instance)", + "expr": "sum(\n irate(\n haproxy_backend_connection_attempts_total{proxy=~\"backend\", instance=~\"$ingress_service\"}[5m]\n )\n) by (instance)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "Back", "refId": "B" }, { - "expr": "sum(irate(haproxy_backend_connection_errors_total{proxy=~\"backend\",instance=~\"$ingress_service\"}[5m])) by (instance)", + "expr": "sum(\n irate(\n haproxy_backend_connection_errors_total{proxy=~\"backend\", instance=~\"$ingress_service\"}[5m]\n )\n) by (instance)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "Back errors", @@ -1019,28 +1019,28 @@ "steppedLine": false, "targets": [ { - "expr": "sum(irate(haproxy_frontend_bytes_in_total{proxy=~\"frontend\",instance=~\"$ingress_service\"}[5m])*8) by (instance)", + "expr": "sum(\n irate(\n haproxy_frontend_bytes_in_total{proxy=~\"frontend\", instance=~\"$ingress_service\"}[5m]\n ) * 8\n) by (instance)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "IN Front", "refId": "A" }, { - "expr": "sum(irate(haproxy_frontend_bytes_out_total{proxy=~\"frontend\",instance=~\"$ingress_service\"}[5m])*8) by (instance)", + "expr": "sum(\n irate(\n haproxy_frontend_bytes_out_total{proxy=~\"frontend\", instance=~\"$ingress_service\"}[5m]\n ) * 8\n) by (instance)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "OUT Front", "refId": "B" }, { - "expr": "sum(irate(haproxy_backend_bytes_in_total{proxy=~\"backend\",instance=~\"$ingress_service\"}[5m])*8) by (instance)", + "expr": "sum(\n irate(\n haproxy_backend_bytes_in_total{proxy=~\"backend\", instance=~\"$ingress_service\"}[5m]\n ) * 8\n) by (instance)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "IN Back", "refId": "C" }, { - "expr": "sum(irate(haproxy_backend_bytes_out_total{proxy=~\"backend\",instance=~\"$ingress_service\"}[5m])*8) by (instance)", + "expr": "sum(\n irate(\n haproxy_backend_bytes_out_total{proxy=~\"backend\", instance=~\"$ingress_service\"}[5m]\n ) * 8\n) by (instance)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "OUT Back", diff --git a/monitoring/ceph-mixin/dashboards_out/rbd-details.json b/monitoring/ceph-mixin/dashboards_out/rbd-details.json index 7a9e1b56b8fc9..1f03187260003 100644 --- a/monitoring/ceph-mixin/dashboards_out/rbd-details.json +++ b/monitoring/ceph-mixin/dashboards_out/rbd-details.json @@ -266,14 +266,14 @@ "steppedLine": false, "targets": [ { - "expr": "irate(ceph_rbd_write_latency_sum{pool=\"$Pool\", image=\"$Image\"}[30s]) / irate(ceph_rbd_write_latency_count{pool=\"$Pool\", image=\"$Image\"}[30s])", + "expr": "irate(ceph_rbd_write_latency_sum{pool=\"$Pool\", image=\"$Image\"}[30s]) /\n irate(ceph_rbd_write_latency_count{pool=\"$Pool\", image=\"$Image\"}[30s])\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{pool}} Write", "refId": "A" }, { - "expr": "irate(ceph_rbd_read_latency_sum{pool=\"$Pool\", image=\"$Image\"}[30s]) / irate(ceph_rbd_read_latency_count{pool=\"$Pool\", image=\"$Image\"}[30s])", + "expr": "irate(ceph_rbd_read_latency_sum{pool=\"$Pool\", image=\"$Image\"}[30s]) /\n irate(ceph_rbd_read_latency_count{pool=\"$Pool\", image=\"$Image\"}[30s])\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{pool}} Read", diff --git a/monitoring/ceph-mixin/dashboards_out/rbd-overview.json b/monitoring/ceph-mixin/dashboards_out/rbd-overview.json index 29b82afa523b0..0505437648772 100644 --- a/monitoring/ceph-mixin/dashboards_out/rbd-overview.json +++ b/monitoring/ceph-mixin/dashboards_out/rbd-overview.json @@ -278,14 +278,14 @@ "steppedLine": false, "targets": [ { - "expr": "round(sum(irate(ceph_rbd_write_latency_sum[30s])) / sum(irate(ceph_rbd_write_latency_count[30s])))", + "expr": "round(\n sum(irate(ceph_rbd_write_latency_sum[30s])) /\n sum(irate(ceph_rbd_write_latency_count[30s]))\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "Write", "refId": "A" }, { - "expr": "round(sum(irate(ceph_rbd_read_latency_sum[30s])) / sum(irate(ceph_rbd_read_latency_count[30s])))", + "expr": "round(\n sum(irate(ceph_rbd_read_latency_sum[30s])) /\n sum(irate(ceph_rbd_read_latency_count[30s]))\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "Read", @@ -416,7 +416,7 @@ ], "targets": [ { - "expr": "topk(10, (sort((irate(ceph_rbd_write_ops[30s]) + on (image, pool, namespace) irate(ceph_rbd_read_ops[30s])))))", + "expr": "topk(10,\n (\n sort((\n irate(ceph_rbd_write_ops[30s]) +\n on (image, pool, namespace) irate(ceph_rbd_read_ops[30s])\n ))\n )\n)\n", "format": "table", "instant": true, "intervalFactor": 1, @@ -518,7 +518,7 @@ ], "targets": [ { - "expr": "topk(10, sort(sum(irate(ceph_rbd_read_bytes[30s]) + irate(ceph_rbd_write_bytes[30s])) by (pool, image, namespace)))", + "expr": "topk(10,\n sort(\n sum(\n irate(ceph_rbd_read_bytes[30s]) + irate(ceph_rbd_write_bytes[30s])\n ) by (pool, image, namespace)\n )\n)\n", "format": "table", "instant": true, "intervalFactor": 1, @@ -620,7 +620,7 @@ ], "targets": [ { - "expr": "topk(10,\n sum(\n irate(ceph_rbd_write_latency_sum[30s]) / clamp_min(irate(ceph_rbd_write_latency_count[30s]), 1) +\n irate(ceph_rbd_read_latency_sum[30s]) / clamp_min(irate(ceph_rbd_read_latency_count[30s]), 1)\n ) by (pool, image, namespace)\n)", + "expr": "topk(10,\n sum(\n irate(ceph_rbd_write_latency_sum[30s]) / clamp_min(irate(ceph_rbd_write_latency_count[30s]), 1) +\n irate(ceph_rbd_read_latency_sum[30s]) / clamp_min(irate(ceph_rbd_read_latency_count[30s]), 1)\n ) by (pool, image, namespace)\n)\n", "format": "table", "instant": true, "intervalFactor": 1,