diff --git a/PendingReleaseNotes b/PendingReleaseNotes index b61fb70c2acc6..572e45f438b2a 100644 --- a/PendingReleaseNotes +++ b/PendingReleaseNotes @@ -194,3 +194,12 @@ If you deployed Luminous dev releases or 12.1.0 rc release and made use of the CRUSH choose_args feature, you need to remove all choose_args mappings from your CRUSH map before starting the upgrade. + +* The 'ceph health' structured output (JSON or XML) no longer contains + a 'timechecks' section describing the time sync status. This + information is now available via the 'ceph time-sync-status' + command. + +* Certain extra fields in the 'ceph health' structured output that + used to appear if the mons were low on disk space (which duplicated + the information in the normal health warning messages) are now gone. diff --git a/qa/cephfs/overrides/whitelist_wrongly_marked_down.yaml b/qa/cephfs/overrides/whitelist_wrongly_marked_down.yaml index 4f2d6df18864d..155ca72452963 100644 --- a/qa/cephfs/overrides/whitelist_wrongly_marked_down.yaml +++ b/qa/cephfs/overrides/whitelist_wrongly_marked_down.yaml @@ -1,7 +1,12 @@ overrides: ceph: log-whitelist: + - overall HEALTH_ + - (OSD_DOWN) + - (OSD_ - wrongly marked me down +# MDS daemon 'b' is not responding, replacing it as rank 0 with standby 'a' + - is not responding conf: mds: debug mds: 20 diff --git a/qa/suites/fs/basic_functional/overrides/whitelist_health.yaml b/qa/suites/fs/basic_functional/overrides/whitelist_health.yaml new file mode 100644 index 0000000000000..b5bf1fa7b5eff --- /dev/null +++ b/qa/suites/fs/basic_functional/overrides/whitelist_health.yaml @@ -0,0 +1,9 @@ +overrides: + ceph: + log-whitelist: + - overall HEALTH_ + - (FS_DEGRADED) + - (MDS_FAILED) + - (MDS_DEGRADED) + - (FS_WITH_FAILED_MDS) + - (MDS_DAMAGE) diff --git a/qa/suites/fs/basic_functional/tasks/cephfs_scrub_tests.yaml b/qa/suites/fs/basic_functional/tasks/cephfs_scrub_tests.yaml index 3b2714689f0df..30b3a96e20760 100644 --- a/qa/suites/fs/basic_functional/tasks/cephfs_scrub_tests.yaml +++ b/qa/suites/fs/basic_functional/tasks/cephfs_scrub_tests.yaml @@ -4,6 +4,8 @@ overrides: - Scrub error on inode - Behind on trimming - Metadata damage detected + - overall HEALTH_ + - (MDS_TRIM) conf: mds: mds log max segments: 1 diff --git a/qa/suites/fs/thrash/overrides/whitelist_health.yaml b/qa/suites/fs/thrash/overrides/whitelist_health.yaml new file mode 100644 index 0000000000000..fc8119daca809 --- /dev/null +++ b/qa/suites/fs/thrash/overrides/whitelist_health.yaml @@ -0,0 +1,8 @@ +overrides: + ceph: + log-whitelist: + - overall HEALTH_ + - (FS_DEGRADED) + - (MDS_FAILED) + - (MDS_DEGRADED) + - (FS_WITH_FAILED_MDS) diff --git a/qa/suites/rados/basic-luminous/scrub_test.yaml b/qa/suites/rados/basic-luminous/scrub_test.yaml index 07f039aae2915..d87f5bfdd35b4 100644 --- a/qa/suites/rados/basic-luminous/scrub_test.yaml +++ b/qa/suites/rados/basic-luminous/scrub_test.yaml @@ -15,6 +15,12 @@ overrides: - 'attr name mistmatch' - 'deep-scrub 1 missing, 0 inconsistent objects' - 'failed to pick suitable auth object' + - overall HEALTH_ + - (OSDMAP_FLAGS) + - (OSD_ + - (PG_ + - (OSD_SCRUB_ERRORS) + - (TOO_FEW_PGS) conf: osd: osd deep scrub update digest min age: 0 diff --git a/qa/suites/rados/basic/tasks/rados_api_tests.yaml b/qa/suites/rados/basic/tasks/rados_api_tests.yaml index b66423988d7cf..1d77207d2b43b 100644 --- a/qa/suites/rados/basic/tasks/rados_api_tests.yaml +++ b/qa/suites/rados/basic/tasks/rados_api_tests.yaml @@ -3,6 +3,11 @@ overrides: log-whitelist: - reached quota - wrongly marked me down + - overall HEALTH_ + - (POOL_FULL) + - (SMALLER_PGP_NUM) + - (CACHE_POOL_NO_HIT_SET) + - (CACHE_POOL_NEAR_FULL) tasks: - workunit: clients: diff --git a/qa/suites/rados/basic/tasks/rados_python.yaml b/qa/suites/rados/basic/tasks/rados_python.yaml index d8b332b343dc6..aa22ccd16e1be 100644 --- a/qa/suites/rados/basic/tasks/rados_python.yaml +++ b/qa/suites/rados/basic/tasks/rados_python.yaml @@ -2,6 +2,11 @@ overrides: ceph: log-whitelist: - wrongly marked me down + - overall HEALTH_ + - (OSDMAP_FLAGS) + - (PG_ + - (OSD_ + - (OBJECT_ tasks: - workunit: clients: diff --git a/qa/suites/rados/basic/tasks/rados_stress_watch.yaml b/qa/suites/rados/basic/tasks/rados_stress_watch.yaml index 0e1ba010c5b4e..ded794c17412f 100644 --- a/qa/suites/rados/basic/tasks/rados_stress_watch.yaml +++ b/qa/suites/rados/basic/tasks/rados_stress_watch.yaml @@ -1,3 +1,9 @@ +overrides: + ceph: + log-whitelist: + - overall HEALTH_ + - (CACHE_POOL_NO_HIT_SET) + - (TOO_FEW_PGS) tasks: - workunit: clients: diff --git a/qa/suites/rados/basic/tasks/repair_test.yaml b/qa/suites/rados/basic/tasks/repair_test.yaml index f69866994417e..8401c1a303f0c 100644 --- a/qa/suites/rados/basic/tasks/repair_test.yaml +++ b/qa/suites/rados/basic/tasks/repair_test.yaml @@ -17,6 +17,10 @@ overrides: - 'size 1 != size' - attr name mismatch - Regular scrub request, losing deep-scrub details + - overall HEALTH_ + - (OSDMAP_FLAGS) + - (OSD_ + - (PG_ conf: osd: filestore debug inject read err: true diff --git a/qa/suites/rados/mgr/tasks/failover.yaml b/qa/suites/rados/mgr/tasks/failover.yaml index e02b8bf2cb078..fd5eb8515c905 100644 --- a/qa/suites/rados/mgr/tasks/failover.yaml +++ b/qa/suites/rados/mgr/tasks/failover.yaml @@ -4,7 +4,11 @@ tasks: - ceph: # tests may leave mgrs broken, so don't try and call into them # to invoke e.g. pg dump during teardown. - wait-for-scrub: false + wait-for-scrub: false + log-whitelist: + - overall HEALTH_ + - (MGR_DOWN) + - (PG_ - cephfs_test_runner: modules: - tasks.mgr.test_failover diff --git a/qa/suites/rados/monthrash/thrashers/force-sync-many.yaml b/qa/suites/rados/monthrash/thrashers/force-sync-many.yaml index 2867f2db5ec7c..38570fcf615ef 100644 --- a/qa/suites/rados/monthrash/thrashers/force-sync-many.yaml +++ b/qa/suites/rados/monthrash/thrashers/force-sync-many.yaml @@ -1,3 +1,9 @@ +overrides: + ceph: + log-whitelist: + - overall HEALTH_ + - (MON_DOWN) + - (TOO_FEW_PGS) tasks: - mon_thrash: revive_delay: 90 diff --git a/qa/suites/rados/monthrash/thrashers/many.yaml b/qa/suites/rados/monthrash/thrashers/many.yaml index fe52bb2bbeb5e..e940c42ad7435 100644 --- a/qa/suites/rados/monthrash/thrashers/many.yaml +++ b/qa/suites/rados/monthrash/thrashers/many.yaml @@ -1,5 +1,8 @@ overrides: ceph: + log-whitelist: + - overall HEALTH_ + - (MON_DOWN) conf: osd: mon client ping interval: 4 diff --git a/qa/suites/rados/monthrash/thrashers/one.yaml b/qa/suites/rados/monthrash/thrashers/one.yaml index 2ce44c8601fa5..92c9eb3a808ff 100644 --- a/qa/suites/rados/monthrash/thrashers/one.yaml +++ b/qa/suites/rados/monthrash/thrashers/one.yaml @@ -1,3 +1,8 @@ +overrides: + ceph: + log-whitelist: + - overall HEALTH_ + - (MON_DOWN) tasks: - mon_thrash: revive_delay: 20 diff --git a/qa/suites/rados/monthrash/thrashers/sync-many.yaml b/qa/suites/rados/monthrash/thrashers/sync-many.yaml index 9868f18159f64..68020cd665143 100644 --- a/qa/suites/rados/monthrash/thrashers/sync-many.yaml +++ b/qa/suites/rados/monthrash/thrashers/sync-many.yaml @@ -1,5 +1,8 @@ overrides: ceph: + log-whitelist: + - overall HEALTH_ + - (MON_DOWN) conf: mon: paxos min: 10 diff --git a/qa/suites/rados/monthrash/thrashers/sync.yaml b/qa/suites/rados/monthrash/thrashers/sync.yaml index 1e7054c271d86..b07f8b511f65e 100644 --- a/qa/suites/rados/monthrash/thrashers/sync.yaml +++ b/qa/suites/rados/monthrash/thrashers/sync.yaml @@ -1,5 +1,8 @@ overrides: ceph: + log-whitelist: + - overall HEALTH_ + - (MON_DOWN) conf: mon: paxos min: 10 diff --git a/qa/suites/rados/monthrash/workloads/rados_api_tests.yaml b/qa/suites/rados/monthrash/workloads/rados_api_tests.yaml index b536557fdbac2..0834f9c34c183 100644 --- a/qa/suites/rados/monthrash/workloads/rados_api_tests.yaml +++ b/qa/suites/rados/monthrash/workloads/rados_api_tests.yaml @@ -2,6 +2,12 @@ overrides: ceph: log-whitelist: - reached quota + - overall HEALTH_ + - (CACHE_POOL_NO_HIT_SET) + - (POOL_FULL) + - (REQUEST_SLOW) + - (MON_DOWN) + - (PG_ conf: global: debug objecter: 20 diff --git a/qa/suites/rados/monthrash/workloads/rados_mon_workunits.yaml b/qa/suites/rados/monthrash/workloads/rados_mon_workunits.yaml index 31465cffe7127..86818b58dff70 100644 --- a/qa/suites/rados/monthrash/workloads/rados_mon_workunits.yaml +++ b/qa/suites/rados/monthrash/workloads/rados_mon_workunits.yaml @@ -2,6 +2,9 @@ overrides: ceph: log-whitelist: - wrongly marked me down + - overall HEALTH_ + - (PG_ + - (MON_DOWN) tasks: - workunit: clients: diff --git a/qa/suites/rados/multimon/tasks/mon_clock_no_skews.yaml b/qa/suites/rados/multimon/tasks/mon_clock_no_skews.yaml index e86bdde1d7d24..ec761e2955ee2 100644 --- a/qa/suites/rados/multimon/tasks/mon_clock_no_skews.yaml +++ b/qa/suites/rados/multimon/tasks/mon_clock_no_skews.yaml @@ -5,5 +5,7 @@ tasks: - slow request - .*clock.*skew.* - clocks not synchronized + - overall HEALTH_ + - (MON_CLOCK_SKEW) - mon_clock_skew_check: expect-skew: false diff --git a/qa/suites/rados/multimon/tasks/mon_clock_with_skews.yaml b/qa/suites/rados/multimon/tasks/mon_clock_with_skews.yaml index 1c6c1538b800e..2bba607152ea2 100644 --- a/qa/suites/rados/multimon/tasks/mon_clock_with_skews.yaml +++ b/qa/suites/rados/multimon/tasks/mon_clock_with_skews.yaml @@ -9,5 +9,7 @@ tasks: - slow request - .*clock.*skew.* - clocks not synchronized + - overall HEALTH_ + - (MON_CLOCK_SKEW) - mon_clock_skew_check: expect-skew: true diff --git a/qa/suites/rados/multimon/tasks/mon_recovery.yaml b/qa/suites/rados/multimon/tasks/mon_recovery.yaml index 94721ea53a495..4234bf73e6816 100644 --- a/qa/suites/rados/multimon/tasks/mon_recovery.yaml +++ b/qa/suites/rados/multimon/tasks/mon_recovery.yaml @@ -1,4 +1,7 @@ tasks: - install: - ceph: + log-whitelist: + - overall HEALTH_ + - (MON_DOWN) - mon_recovery: diff --git a/qa/suites/rados/objectstore/ceph_objectstore_tool.yaml b/qa/suites/rados/objectstore/ceph_objectstore_tool.yaml index 215d0f08f9b6b..2001faa3fe81b 100644 --- a/qa/suites/rados/objectstore/ceph_objectstore_tool.yaml +++ b/qa/suites/rados/objectstore/ceph_objectstore_tool.yaml @@ -12,5 +12,11 @@ tasks: global: osd max object name len: 460 osd max object namespace len: 64 + log-whitelist: + - overall HEALTH_ + - (OSDMAP_FLAGS) + - (OSD_ + - (PG_ + - (TOO_FEW_PGS) - ceph_objectstore_tool: objects: 20 diff --git a/qa/suites/rados/rest/mgr-restful.yaml b/qa/suites/rados/rest/mgr-restful.yaml index 571857c251107..5dd16fda288cf 100644 --- a/qa/suites/rados/rest/mgr-restful.yaml +++ b/qa/suites/rados/rest/mgr-restful.yaml @@ -3,6 +3,9 @@ roles: tasks: - install: - ceph: + log-whitelist: + - overall HEALTH_ + - (MGR_DOWN) - exec: mon.a: - ceph config-key put mgr/restful/x/server_addr 127.0.0.1 diff --git a/qa/suites/rados/singleton-bluestore/all/cephtool.yaml b/qa/suites/rados/singleton-bluestore/all/cephtool.yaml index 880628f4fd2b7..7e1a1f7b389a4 100644 --- a/qa/suites/rados/singleton-bluestore/all/cephtool.yaml +++ b/qa/suites/rados/singleton-bluestore/all/cephtool.yaml @@ -21,6 +21,11 @@ tasks: - must scrub before tier agent can activate - failsafe engaged, dropping updates - failsafe disengaged, no longer dropping updates + - overall HEALTH_ + - (OSDMAP_FLAGS) + - (OSD_ + - (PG_ + - (SMALLER_PG_NUM) - workunit: clients: all: diff --git a/qa/suites/rados/singleton-nomsgr/all/admin_socket_output.yaml b/qa/suites/rados/singleton-nomsgr/all/admin_socket_output.yaml index 969c40902fb91..3aaca87594032 100644 --- a/qa/suites/rados/singleton-nomsgr/all/admin_socket_output.yaml +++ b/qa/suites/rados/singleton-nomsgr/all/admin_socket_output.yaml @@ -5,6 +5,10 @@ overrides: log-whitelist: - MDS in read-only mode - force file system read-only + - overall HEALTH_ + - (OSDMAP_FLAGS) + - (OSD_FULL) + - (MDS_READ_ONLY) tasks: - install: - ceph: diff --git a/qa/suites/rados/singleton-nomsgr/all/cache-fs-trunc.yaml b/qa/suites/rados/singleton-nomsgr/all/cache-fs-trunc.yaml index 5009ee617035c..ac64165aaaa6a 100644 --- a/qa/suites/rados/singleton-nomsgr/all/cache-fs-trunc.yaml +++ b/qa/suites/rados/singleton-nomsgr/all/cache-fs-trunc.yaml @@ -3,6 +3,9 @@ roles: tasks: - install: - ceph: + log-whitelist: + - overall HEALTH_ + - (CACHE_POOL_NO_HIT_SET) conf: global: osd max object name len: 460 diff --git a/qa/suites/rados/singleton-nomsgr/all/export-after-evict.yaml b/qa/suites/rados/singleton-nomsgr/all/export-after-evict.yaml index e0badd4d3afad..1b777ab0f00b7 100644 --- a/qa/suites/rados/singleton-nomsgr/all/export-after-evict.yaml +++ b/qa/suites/rados/singleton-nomsgr/all/export-after-evict.yaml @@ -8,6 +8,9 @@ roles: tasks: - install: - ceph: + log-whitelist: + - overall HEALTH_ + - (CACHE_POOL_NO_HIT_SET) conf: global: osd max object name len: 460 diff --git a/qa/suites/rados/singleton-nomsgr/all/full-tiering.yaml b/qa/suites/rados/singleton-nomsgr/all/full-tiering.yaml index 9dc1fe7dcc9bf..5eb42f4dd6390 100644 --- a/qa/suites/rados/singleton-nomsgr/all/full-tiering.yaml +++ b/qa/suites/rados/singleton-nomsgr/all/full-tiering.yaml @@ -5,6 +5,10 @@ overrides: ceph: log-whitelist: - is full + - overall HEALTH_ + - (POOL_FULL) + - (POOL_NEAR_FULL) + - (CACHE_POOL_NO_HIT_SET) tasks: - install: - ceph: diff --git a/qa/suites/rados/singleton-nomsgr/all/health-warnings.yaml b/qa/suites/rados/singleton-nomsgr/all/health-warnings.yaml index 4c8228b0cd948..749bd8d39c3be 100644 --- a/qa/suites/rados/singleton-nomsgr/all/health-warnings.yaml +++ b/qa/suites/rados/singleton-nomsgr/all/health-warnings.yaml @@ -10,6 +10,10 @@ tasks: osd max object namespace len: 64 log-whitelist: - wrongly marked me down + - overall HEALTH_ + - (OSDMAP_FLAGS) + - (OSD_ + - (PG_ - workunit: clients: all: diff --git a/qa/suites/rados/singleton-nomsgr/all/multi-backfill-reject.yaml b/qa/suites/rados/singleton-nomsgr/all/multi-backfill-reject.yaml index b73899738e674..cadf3044a1d12 100644 --- a/qa/suites/rados/singleton-nomsgr/all/multi-backfill-reject.yaml +++ b/qa/suites/rados/singleton-nomsgr/all/multi-backfill-reject.yaml @@ -11,6 +11,11 @@ roles: tasks: - install: - ceph: + log-whitelist: + - overall HEALTH_ + - (PG_ + - (OSD_ + - (OBJECT_ conf: osd: osd debug reject backfill probability: .3 diff --git a/qa/suites/rados/singleton-nomsgr/all/valgrind-leaks.yaml b/qa/suites/rados/singleton-nomsgr/all/valgrind-leaks.yaml index e5d5702a32b65..65af1a2e817c5 100644 --- a/qa/suites/rados/singleton-nomsgr/all/valgrind-leaks.yaml +++ b/qa/suites/rados/singleton-nomsgr/all/valgrind-leaks.yaml @@ -7,6 +7,9 @@ overrides: flavor: notcmalloc debuginfo: true ceph: + log-whitelist: + - overall HEALTH_ + - (PG_ conf: global: osd heartbeat grace: 40 diff --git a/qa/suites/rados/singleton/all/divergent_priors.yaml b/qa/suites/rados/singleton/all/divergent_priors.yaml index bb7c2b57f50f8..f15fb88961587 100644 --- a/qa/suites/rados/singleton/all/divergent_priors.yaml +++ b/qa/suites/rados/singleton/all/divergent_priors.yaml @@ -12,6 +12,12 @@ openstack: overrides: ceph: + log-whitelist: + - overall HEALTH_ + - (OSDMAP_FLAGS) + - (OSD_ + - (PG_ + - (OBJECT_DEGRADED) conf: osd: debug osd: 5 diff --git a/qa/suites/rados/singleton/all/divergent_priors2.yaml b/qa/suites/rados/singleton/all/divergent_priors2.yaml index ab749f1b516ec..90d8b1838b986 100644 --- a/qa/suites/rados/singleton/all/divergent_priors2.yaml +++ b/qa/suites/rados/singleton/all/divergent_priors2.yaml @@ -12,6 +12,12 @@ openstack: overrides: ceph: + log-whitelist: + - overall HEALTH_ + - (OSDMAP_FLAGS) + - (OSD_ + - (PG_ + - (OBJECT_DEGRADED) conf: osd: debug osd: 5 diff --git a/qa/suites/rados/singleton/all/dump-stuck.yaml b/qa/suites/rados/singleton/all/dump-stuck.yaml index 7d3b443021bbb..f3900e121fe72 100644 --- a/qa/suites/rados/singleton/all/dump-stuck.yaml +++ b/qa/suites/rados/singleton/all/dump-stuck.yaml @@ -11,5 +11,9 @@ tasks: - install: - ceph: log-whitelist: - - wrongly marked me down + - wrongly marked me down + - overall HEALTH_ + - (OSDMAP_FLAGS) + - (OSD_ + - (PG_ - dump_stuck: diff --git a/qa/suites/rados/singleton/all/ec-lost-unfound.yaml b/qa/suites/rados/singleton/all/ec-lost-unfound.yaml index 6ceefe1222e35..e095fd0d58690 100644 --- a/qa/suites/rados/singleton/all/ec-lost-unfound.yaml +++ b/qa/suites/rados/singleton/all/ec-lost-unfound.yaml @@ -15,5 +15,10 @@ tasks: - install: - ceph: log-whitelist: - - objects unfound and apparently lost + - objects unfound and apparently lost + - overall HEALTH_ + - (OSDMAP_FLAGS) + - (OSD_ + - (PG_ + - (OBJECT_ - ec_lost_unfound: diff --git a/qa/suites/rados/singleton/all/lost-unfound-delete.yaml b/qa/suites/rados/singleton/all/lost-unfound-delete.yaml index 15f4710bd9bf4..5502b5c9b0ffc 100644 --- a/qa/suites/rados/singleton/all/lost-unfound-delete.yaml +++ b/qa/suites/rados/singleton/all/lost-unfound-delete.yaml @@ -14,5 +14,10 @@ tasks: - install: - ceph: log-whitelist: - - objects unfound and apparently lost + - objects unfound and apparently lost + - overall HEALTH_ + - (OSDMAP_FLAGS) + - (OSD_ + - (PG_ + - (OBJECT_ - rep_lost_unfound_delete: diff --git a/qa/suites/rados/singleton/all/lost-unfound.yaml b/qa/suites/rados/singleton/all/lost-unfound.yaml index 3f22ba3c0136a..bb0bb2c0afe49 100644 --- a/qa/suites/rados/singleton/all/lost-unfound.yaml +++ b/qa/suites/rados/singleton/all/lost-unfound.yaml @@ -14,5 +14,10 @@ tasks: - install: - ceph: log-whitelist: - - objects unfound and apparently lost + - objects unfound and apparently lost + - overall HEALTH_ + - (OSDMAP_FLAGS) + - (OSD_ + - (PG_ + - (OBJECT_ - lost_unfound: diff --git a/qa/suites/rados/singleton/all/mon-thrasher.yaml b/qa/suites/rados/singleton/all/mon-thrasher.yaml index 1b4622998f9ba..66a1e905f3b83 100644 --- a/qa/suites/rados/singleton/all/mon-thrasher.yaml +++ b/qa/suites/rados/singleton/all/mon-thrasher.yaml @@ -13,6 +13,10 @@ openstack: tasks: - install: - ceph: + log-whitelist: + - overall HEALTH_ + - (MON_DOWN) + - (PG_ - mon_thrash: revive_delay: 20 thrash_delay: 1 diff --git a/qa/suites/rados/singleton/all/osd-backfill.yaml b/qa/suites/rados/singleton/all/osd-backfill.yaml index f84a0df8d6600..84e2273d3f883 100644 --- a/qa/suites/rados/singleton/all/osd-backfill.yaml +++ b/qa/suites/rados/singleton/all/osd-backfill.yaml @@ -14,7 +14,12 @@ tasks: - install: - ceph: log-whitelist: - - wrongly marked me down + - wrongly marked me down + - overall HEALTH_ + - (OSDMAP_FLAGS) + - (OSD_ + - (PG_ + - (OBJECT_ conf: osd: osd min pg log entries: 5 diff --git a/qa/suites/rados/singleton/all/osd-recovery-incomplete.yaml b/qa/suites/rados/singleton/all/osd-recovery-incomplete.yaml index 773cb2480a6fe..60789d5ca68c6 100644 --- a/qa/suites/rados/singleton/all/osd-recovery-incomplete.yaml +++ b/qa/suites/rados/singleton/all/osd-recovery-incomplete.yaml @@ -15,7 +15,12 @@ tasks: - install: - ceph: log-whitelist: - - wrongly marked me down + - wrongly marked me down + - overall HEALTH_ + - (OSDMAP_FLAGS) + - (OSD_ + - (PG_ + - (OBJECT_ conf: osd: osd min pg log entries: 5 diff --git a/qa/suites/rados/singleton/all/osd-recovery.yaml b/qa/suites/rados/singleton/all/osd-recovery.yaml index 214d7f20cc74c..d6e5e957f24c6 100644 --- a/qa/suites/rados/singleton/all/osd-recovery.yaml +++ b/qa/suites/rados/singleton/all/osd-recovery.yaml @@ -14,7 +14,12 @@ tasks: - install: - ceph: log-whitelist: - - wrongly marked me down + - wrongly marked me down + - overall HEALTH_ + - (OSDMAP_FLAGS) + - (OSD_ + - (PG_ + - (OBJECT_DEGRADED) conf: osd: osd min pg log entries: 5 diff --git a/qa/suites/rados/singleton/all/peer.yaml b/qa/suites/rados/singleton/all/peer.yaml index 6e22b44563e9d..e87cd543ce6c9 100644 --- a/qa/suites/rados/singleton/all/peer.yaml +++ b/qa/suites/rados/singleton/all/peer.yaml @@ -17,5 +17,9 @@ tasks: global: osd pool default min size : 1 log-whitelist: - - objects unfound and apparently lost + - objects unfound and apparently lost + - overall HEALTH_ + - (OSDMAP_FLAGS) + - (OSD_ + - (PG_ - peer: diff --git a/qa/suites/rados/singleton/all/pg-removal-interruption.yaml b/qa/suites/rados/singleton/all/pg-removal-interruption.yaml index f7e61c962633f..856b08dd43735 100644 --- a/qa/suites/rados/singleton/all/pg-removal-interruption.yaml +++ b/qa/suites/rados/singleton/all/pg-removal-interruption.yaml @@ -13,8 +13,12 @@ tasks: - install: - ceph: log-whitelist: - - wrongly marked me down - - slow request + - wrongly marked me down + - slow request + - overall HEALTH_ + - (OSDMAP_FLAGS) + - (OSD_ + - (PG_ - exec: client.0: - sudo ceph osd pool create foo 128 128 diff --git a/qa/suites/rados/singleton/all/radostool.yaml b/qa/suites/rados/singleton/all/radostool.yaml index 8bc9dbdcd0f06..700b3a33a3cfb 100644 --- a/qa/suites/rados/singleton/all/radostool.yaml +++ b/qa/suites/rados/singleton/all/radostool.yaml @@ -17,6 +17,8 @@ tasks: - had wrong client addr - had wrong cluster addr - reached quota + - overall HEALTH_ + - (POOL_FULL) - workunit: clients: all: diff --git a/qa/suites/rados/singleton/all/rebuild-mondb.yaml b/qa/suites/rados/singleton/all/rebuild-mondb.yaml index c3be13ae6a2cf..6847cef8da29a 100644 --- a/qa/suites/rados/singleton/all/rebuild-mondb.yaml +++ b/qa/suites/rados/singleton/all/rebuild-mondb.yaml @@ -15,7 +15,12 @@ tasks: - install: - ceph: log-whitelist: - - no reply from + - no reply from + - overall HEALTH_ + - (MON_DOWN) + - (OSDMAP_FLAGS) + - (OSD_ + - (PG_ - full_sequential: - radosbench: clients: [client.0] diff --git a/qa/suites/rados/singleton/all/resolve_stuck_peering.yaml b/qa/suites/rados/singleton/all/resolve_stuck_peering.yaml index c64593212a5f2..97da137909608 100644 --- a/qa/suites/rados/singleton/all/resolve_stuck_peering.yaml +++ b/qa/suites/rados/singleton/all/resolve_stuck_peering.yaml @@ -6,5 +6,11 @@ tasks: - install: - ceph: fs: xfs + log-whitelist: + - overall HEALTH_ + - (OSDMAP_FLAGS) + - (OSD_ + - (PG_ + - (OBJECT_DEGRADED) - resolve_stuck_peering: diff --git a/qa/suites/rados/singleton/all/rest-api.yaml b/qa/suites/rados/singleton/all/rest-api.yaml index cbd90e4097da0..77c881b0ee502 100644 --- a/qa/suites/rados/singleton/all/rest-api.yaml +++ b/qa/suites/rados/singleton/all/rest-api.yaml @@ -16,8 +16,13 @@ tasks: - install: - ceph: log-whitelist: - - wrongly marked me down - - had wrong client addr + - wrongly marked me down + - had wrong client addr + - overall HEALTH_ + - (OSDMAP_FLAGS) + - (OSD_ + - (PG_ + - (OBJECT_DEGRADED) conf: client.rest0: debug ms: 1 diff --git a/qa/suites/rados/singleton/all/thrash-eio.yaml b/qa/suites/rados/singleton/all/thrash-eio.yaml index a70636549cd53..6ff629033cd07 100644 --- a/qa/suites/rados/singleton/all/thrash-eio.yaml +++ b/qa/suites/rados/singleton/all/thrash-eio.yaml @@ -24,6 +24,11 @@ tasks: - wrongly marked me down - missing primary copy of - objects unfound and apparently lost + - overall HEALTH_ + - (OSDMAP_FLAGS) + - (REQUEST_SLOW) + - (PG_ + - (OSD_ - thrashosds: op_delay: 30 clean_interval: 120 diff --git a/qa/suites/rados/singleton/all/thrash-rados/+ b/qa/suites/rados/singleton/all/thrash-rados/+ new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/qa/suites/rados/singleton/all/thrash-rados.yaml b/qa/suites/rados/singleton/all/thrash-rados/thrash-rados.yaml similarity index 92% rename from qa/suites/rados/singleton/all/thrash-rados.yaml rename to qa/suites/rados/singleton/all/thrash-rados/thrash-rados.yaml index 49e3e8799c143..f61897eaabae9 100644 --- a/qa/suites/rados/singleton/all/thrash-rados.yaml +++ b/qa/suites/rados/singleton/all/thrash-rados/thrash-rados.yaml @@ -16,7 +16,7 @@ tasks: - install: - ceph: log-whitelist: - - wrongly marked me down + - wrongly marked me down - thrashosds: op_delay: 30 clean_interval: 120 diff --git a/qa/suites/rados/singleton/all/thrash-rados/thrashosds-health.yaml b/qa/suites/rados/singleton/all/thrash-rados/thrashosds-health.yaml new file mode 120000 index 0000000000000..0b1d7b060a925 --- /dev/null +++ b/qa/suites/rados/singleton/all/thrash-rados/thrashosds-health.yaml @@ -0,0 +1 @@ +../../../../../tasks/thrashosds-health.yaml \ No newline at end of file diff --git a/qa/suites/rados/singleton/all/thrash_cache_writeback_proxy_none.yaml b/qa/suites/rados/singleton/all/thrash_cache_writeback_proxy_none.yaml index 1875da409a25e..02fee3e88ea7d 100644 --- a/qa/suites/rados/singleton/all/thrash_cache_writeback_proxy_none.yaml +++ b/qa/suites/rados/singleton/all/thrash_cache_writeback_proxy_none.yaml @@ -16,8 +16,10 @@ tasks: - install: - ceph: log-whitelist: - - wrongly marked me down - - slow request + - wrongly marked me down + - slow request + - overall HEALTH_ + - (CACHE_POOL_ - exec: client.0: - sudo ceph osd pool create base 4 diff --git a/qa/suites/rados/singleton/all/watch-notify-same-primary.yaml b/qa/suites/rados/singleton/all/watch-notify-same-primary.yaml index ad1fd17d5f589..3efdb955fe62c 100644 --- a/qa/suites/rados/singleton/all/watch-notify-same-primary.yaml +++ b/qa/suites/rados/singleton/all/watch-notify-same-primary.yaml @@ -22,6 +22,11 @@ tasks: debug objecter: 20 debug rados: 20 log-whitelist: - - objects unfound and apparently lost + - objects unfound and apparently lost + - overall HEALTH_ + - (OSDMAP_FLAGS) + - (OSD_ + - (PG_ + - (OBJECT_DEGRADED) - watch_notify_same_primary: clients: [client.0] diff --git a/qa/suites/rados/thrash-erasure-code-big/thrashosds-health.yaml b/qa/suites/rados/thrash-erasure-code-big/thrashosds-health.yaml new file mode 120000 index 0000000000000..ebf7f34f39bcf --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-big/thrashosds-health.yaml @@ -0,0 +1 @@ +../../../tasks/thrashosds-health.yaml \ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-isa/thrashosds-health.yaml b/qa/suites/rados/thrash-erasure-code-isa/thrashosds-health.yaml new file mode 120000 index 0000000000000..ebf7f34f39bcf --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-isa/thrashosds-health.yaml @@ -0,0 +1 @@ +../../../tasks/thrashosds-health.yaml \ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-overwrites/thrashosds-health.yaml b/qa/suites/rados/thrash-erasure-code-overwrites/thrashosds-health.yaml new file mode 120000 index 0000000000000..ebf7f34f39bcf --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-overwrites/thrashosds-health.yaml @@ -0,0 +1 @@ +../../../tasks/thrashosds-health.yaml \ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-shec/thrashosds-health.yaml b/qa/suites/rados/thrash-erasure-code-shec/thrashosds-health.yaml new file mode 120000 index 0000000000000..ebf7f34f39bcf --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-shec/thrashosds-health.yaml @@ -0,0 +1 @@ +../../../tasks/thrashosds-health.yaml \ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code/thrashosds-health.yaml b/qa/suites/rados/thrash-erasure-code/thrashosds-health.yaml new file mode 120000 index 0000000000000..ebf7f34f39bcf --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code/thrashosds-health.yaml @@ -0,0 +1 @@ +../../../tasks/thrashosds-health.yaml \ No newline at end of file diff --git a/qa/suites/rados/thrash-luminous/thrashosds-health.yaml b/qa/suites/rados/thrash-luminous/thrashosds-health.yaml new file mode 120000 index 0000000000000..ebf7f34f39bcf --- /dev/null +++ b/qa/suites/rados/thrash-luminous/thrashosds-health.yaml @@ -0,0 +1 @@ +../../../tasks/thrashosds-health.yaml \ No newline at end of file diff --git a/qa/suites/rados/thrash/thrashosds-health.yaml b/qa/suites/rados/thrash/thrashosds-health.yaml new file mode 120000 index 0000000000000..ebf7f34f39bcf --- /dev/null +++ b/qa/suites/rados/thrash/thrashosds-health.yaml @@ -0,0 +1 @@ +../../../tasks/thrashosds-health.yaml \ No newline at end of file diff --git a/qa/suites/rados/upgrade/jewel-x-singleton/thrashosds-health.yaml b/qa/suites/rados/upgrade/jewel-x-singleton/thrashosds-health.yaml new file mode 120000 index 0000000000000..e0426dbe49935 --- /dev/null +++ b/qa/suites/rados/upgrade/jewel-x-singleton/thrashosds-health.yaml @@ -0,0 +1 @@ +../../../../tasks/thrashosds-health.yaml \ No newline at end of file diff --git a/qa/suites/rados/verify/d-thrash/default/+ b/qa/suites/rados/verify/d-thrash/default/+ new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/qa/suites/rados/verify/d-thrash/default.yaml b/qa/suites/rados/verify/d-thrash/default/default.yaml similarity index 100% rename from qa/suites/rados/verify/d-thrash/default.yaml rename to qa/suites/rados/verify/d-thrash/default/default.yaml diff --git a/qa/suites/rados/verify/d-thrash/default/thrashosds-health.yaml b/qa/suites/rados/verify/d-thrash/default/thrashosds-health.yaml new file mode 120000 index 0000000000000..0b1d7b060a925 --- /dev/null +++ b/qa/suites/rados/verify/d-thrash/default/thrashosds-health.yaml @@ -0,0 +1 @@ +../../../../../tasks/thrashosds-health.yaml \ No newline at end of file diff --git a/qa/suites/rados/verify/tasks/mon_recovery.yaml b/qa/suites/rados/verify/tasks/mon_recovery.yaml index 6986303409ee7..412db863022be 100644 --- a/qa/suites/rados/verify/tasks/mon_recovery.yaml +++ b/qa/suites/rados/verify/tasks/mon_recovery.yaml @@ -1,2 +1,9 @@ +overrides: + ceph: + log-whitelist: + - overall HEALTH_ + - (MON_DOWN) + - (OSDMAP_FLAGS) + - (SMALLER_PGP_NUM) tasks: - mon_recovery: diff --git a/qa/suites/rados/verify/tasks/rados_api_tests.yaml b/qa/suites/rados/verify/tasks/rados_api_tests.yaml index 11e3858f6a7da..7c06248d25492 100644 --- a/qa/suites/rados/verify/tasks/rados_api_tests.yaml +++ b/qa/suites/rados/verify/tasks/rados_api_tests.yaml @@ -2,6 +2,12 @@ overrides: ceph: log-whitelist: - reached quota + - overall HEALTH_ + - (CACHE_POOL_NO_HIT_SET) + - (POOL_FULL) + - (SMALLER_PGP_NUM) + - (REQUEST_SLOW) + - (CACHE_POOL_NEAR_FULL) conf: client: debug ms: 1 diff --git a/qa/suites/rbd/basic/cachepool/small.yaml b/qa/suites/rbd/basic/cachepool/small.yaml index 8262be3304469..5c8f924abadcd 100644 --- a/qa/suites/rbd/basic/cachepool/small.yaml +++ b/qa/suites/rbd/basic/cachepool/small.yaml @@ -1,3 +1,8 @@ +overrides: + ceph: + log-whitelist: + - overall HEALTH_ + - (CACHE_POOL_NO_HIT_SET) tasks: - exec: client.0: diff --git a/qa/suites/rbd/basic/tasks/rbd_api_tests_old_format.yaml b/qa/suites/rbd/basic/tasks/rbd_api_tests_old_format.yaml index a98768540ba97..9d34002a15882 100644 --- a/qa/suites/rbd/basic/tasks/rbd_api_tests_old_format.yaml +++ b/qa/suites/rbd/basic/tasks/rbd_api_tests_old_format.yaml @@ -1,3 +1,8 @@ +overrides: + ceph: + log-whitelist: + - overall HEALTH_ + - (CACHE_POOL_NO_HIT_SET) tasks: - workunit: clients: diff --git a/qa/suites/rbd/basic/tasks/rbd_python_api_tests_old_format.yaml b/qa/suites/rbd/basic/tasks/rbd_python_api_tests_old_format.yaml index 263b784e27dd2..f60a5ffa7e1b2 100644 --- a/qa/suites/rbd/basic/tasks/rbd_python_api_tests_old_format.yaml +++ b/qa/suites/rbd/basic/tasks/rbd_python_api_tests_old_format.yaml @@ -1,3 +1,7 @@ +overrides: + ceph: + log-whitelist: + - (REQUEST_SLOW) tasks: - workunit: clients: diff --git a/qa/suites/rbd/cli/pool/ec-data-pool.yaml b/qa/suites/rbd/cli/pool/ec-data-pool.yaml index 75dfc6a45534e..32dd2ab90792e 100644 --- a/qa/suites/rbd/cli/pool/ec-data-pool.yaml +++ b/qa/suites/rbd/cli/pool/ec-data-pool.yaml @@ -11,6 +11,9 @@ overrides: bdev_inject_crash_probability: .5 ceph: fs: xfs + log-whitelist: + - overall HEALTH_ + - (CACHE_POOL_NO_HIT_SET) conf: client: rbd default data pool: datapool diff --git a/qa/suites/rbd/cli/pool/small-cache-pool.yaml b/qa/suites/rbd/cli/pool/small-cache-pool.yaml index 8262be3304469..5c8f924abadcd 100644 --- a/qa/suites/rbd/cli/pool/small-cache-pool.yaml +++ b/qa/suites/rbd/cli/pool/small-cache-pool.yaml @@ -1,3 +1,8 @@ +overrides: + ceph: + log-whitelist: + - overall HEALTH_ + - (CACHE_POOL_NO_HIT_SET) tasks: - exec: client.0: diff --git a/qa/suites/rbd/librbd/pool/small-cache-pool.yaml b/qa/suites/rbd/librbd/pool/small-cache-pool.yaml index 8262be3304469..5c8f924abadcd 100644 --- a/qa/suites/rbd/librbd/pool/small-cache-pool.yaml +++ b/qa/suites/rbd/librbd/pool/small-cache-pool.yaml @@ -1,3 +1,8 @@ +overrides: + ceph: + log-whitelist: + - overall HEALTH_ + - (CACHE_POOL_NO_HIT_SET) tasks: - exec: client.0: diff --git a/qa/suites/rbd/librbd/workloads/c_api_tests.yaml b/qa/suites/rbd/librbd/workloads/c_api_tests.yaml index 188ddc56c6078..b70e8d52b80af 100644 --- a/qa/suites/rbd/librbd/workloads/c_api_tests.yaml +++ b/qa/suites/rbd/librbd/workloads/c_api_tests.yaml @@ -1,3 +1,8 @@ +overrides: + ceph: + log-whitelist: + - overall HEALTH_ + - (CACHE_POOL_NO_HIT_SET) tasks: - workunit: clients: diff --git a/qa/suites/rbd/librbd/workloads/c_api_tests_with_defaults.yaml b/qa/suites/rbd/librbd/workloads/c_api_tests_with_defaults.yaml index ee1de610a91c0..c2af3573dc5df 100644 --- a/qa/suites/rbd/librbd/workloads/c_api_tests_with_defaults.yaml +++ b/qa/suites/rbd/librbd/workloads/c_api_tests_with_defaults.yaml @@ -1,3 +1,8 @@ +overrides: + ceph: + log-whitelist: + - overall HEALTH_ + - (CACHE_POOL_NO_HIT_SET) tasks: - workunit: clients: diff --git a/qa/suites/rbd/librbd/workloads/c_api_tests_with_journaling.yaml b/qa/suites/rbd/librbd/workloads/c_api_tests_with_journaling.yaml index eda2b5e8a607b..f1121a4039658 100644 --- a/qa/suites/rbd/librbd/workloads/c_api_tests_with_journaling.yaml +++ b/qa/suites/rbd/librbd/workloads/c_api_tests_with_journaling.yaml @@ -1,3 +1,8 @@ +overrides: + ceph: + log-whitelist: + - overall HEALTH_ + - (CACHE_POOL_NO_HIT_SET) tasks: - workunit: clients: diff --git a/qa/suites/rbd/nbd/thrashosds-health.yaml b/qa/suites/rbd/nbd/thrashosds-health.yaml new file mode 120000 index 0000000000000..ebf7f34f39bcf --- /dev/null +++ b/qa/suites/rbd/nbd/thrashosds-health.yaml @@ -0,0 +1 @@ +../../../tasks/thrashosds-health.yaml \ No newline at end of file diff --git a/qa/suites/rbd/qemu/pool/ec-cache-pool.yaml b/qa/suites/rbd/qemu/pool/ec-cache-pool.yaml index 80379a1026b78..09e8bc3f24b6c 100644 --- a/qa/suites/rbd/qemu/pool/ec-cache-pool.yaml +++ b/qa/suites/rbd/qemu/pool/ec-cache-pool.yaml @@ -1,3 +1,8 @@ +overrides: + ceph: + log-whitelist: + - overall HEALTH_ + - (CACHE_POOL_NO_HIT_SET) tasks: - exec: client.0: diff --git a/qa/suites/rbd/qemu/pool/small-cache-pool.yaml b/qa/suites/rbd/qemu/pool/small-cache-pool.yaml index 8262be3304469..5c8f924abadcd 100644 --- a/qa/suites/rbd/qemu/pool/small-cache-pool.yaml +++ b/qa/suites/rbd/qemu/pool/small-cache-pool.yaml @@ -1,3 +1,8 @@ +overrides: + ceph: + log-whitelist: + - overall HEALTH_ + - (CACHE_POOL_NO_HIT_SET) tasks: - exec: client.0: diff --git a/qa/suites/rbd/singleton/all/rbd_mirror.yaml b/qa/suites/rbd/singleton/all/rbd_mirror.yaml index 21624164beba5..5006dd8017b39 100644 --- a/qa/suites/rbd/singleton/all/rbd_mirror.yaml +++ b/qa/suites/rbd/singleton/all/rbd_mirror.yaml @@ -4,6 +4,9 @@ tasks: - install: - ceph: fs: xfs + log-whitelist: + - overall HEALTH_ + - (CACHE_POOL_NO_HIT_SET) - workunit: clients: all: [rbd/test_rbd_mirror.sh] diff --git a/qa/suites/rbd/thrash/thrashosds-health.yaml b/qa/suites/rbd/thrash/thrashosds-health.yaml new file mode 120000 index 0000000000000..ebf7f34f39bcf --- /dev/null +++ b/qa/suites/rbd/thrash/thrashosds-health.yaml @@ -0,0 +1 @@ +../../../tasks/thrashosds-health.yaml \ No newline at end of file diff --git a/qa/suites/rbd/thrash/workloads/rbd_api_tests.yaml b/qa/suites/rbd/thrash/workloads/rbd_api_tests.yaml index ee1de610a91c0..c2af3573dc5df 100644 --- a/qa/suites/rbd/thrash/workloads/rbd_api_tests.yaml +++ b/qa/suites/rbd/thrash/workloads/rbd_api_tests.yaml @@ -1,3 +1,8 @@ +overrides: + ceph: + log-whitelist: + - overall HEALTH_ + - (CACHE_POOL_NO_HIT_SET) tasks: - workunit: clients: diff --git a/qa/suites/rbd/thrash/workloads/rbd_api_tests_copy_on_read.yaml b/qa/suites/rbd/thrash/workloads/rbd_api_tests_copy_on_read.yaml index cfa0a25a70d0f..7f64ef3f13618 100644 --- a/qa/suites/rbd/thrash/workloads/rbd_api_tests_copy_on_read.yaml +++ b/qa/suites/rbd/thrash/workloads/rbd_api_tests_copy_on_read.yaml @@ -7,6 +7,9 @@ tasks: RBD_FEATURES: "61" overrides: ceph: + log-whitelist: + - overall HEALTH_ + - (CACHE_POOL_NO_HIT_SET) conf: client: rbd clone copy on read: true diff --git a/qa/suites/rbd/thrash/workloads/rbd_api_tests_journaling.yaml b/qa/suites/rbd/thrash/workloads/rbd_api_tests_journaling.yaml index eda2b5e8a607b..f1121a4039658 100644 --- a/qa/suites/rbd/thrash/workloads/rbd_api_tests_journaling.yaml +++ b/qa/suites/rbd/thrash/workloads/rbd_api_tests_journaling.yaml @@ -1,3 +1,8 @@ +overrides: + ceph: + log-whitelist: + - overall HEALTH_ + - (CACHE_POOL_NO_HIT_SET) tasks: - workunit: clients: diff --git a/qa/suites/rbd/thrash/workloads/rbd_api_tests_no_locking.yaml b/qa/suites/rbd/thrash/workloads/rbd_api_tests_no_locking.yaml index 188ddc56c6078..b70e8d52b80af 100644 --- a/qa/suites/rbd/thrash/workloads/rbd_api_tests_no_locking.yaml +++ b/qa/suites/rbd/thrash/workloads/rbd_api_tests_no_locking.yaml @@ -1,3 +1,8 @@ +overrides: + ceph: + log-whitelist: + - overall HEALTH_ + - (CACHE_POOL_NO_HIT_SET) tasks: - workunit: clients: diff --git a/qa/suites/rbd/valgrind/workloads/c_api_tests.yaml b/qa/suites/rbd/valgrind/workloads/c_api_tests.yaml index 188ddc56c6078..b70e8d52b80af 100644 --- a/qa/suites/rbd/valgrind/workloads/c_api_tests.yaml +++ b/qa/suites/rbd/valgrind/workloads/c_api_tests.yaml @@ -1,3 +1,8 @@ +overrides: + ceph: + log-whitelist: + - overall HEALTH_ + - (CACHE_POOL_NO_HIT_SET) tasks: - workunit: clients: diff --git a/qa/suites/rbd/valgrind/workloads/c_api_tests_with_defaults.yaml b/qa/suites/rbd/valgrind/workloads/c_api_tests_with_defaults.yaml index ee1de610a91c0..c2af3573dc5df 100644 --- a/qa/suites/rbd/valgrind/workloads/c_api_tests_with_defaults.yaml +++ b/qa/suites/rbd/valgrind/workloads/c_api_tests_with_defaults.yaml @@ -1,3 +1,8 @@ +overrides: + ceph: + log-whitelist: + - overall HEALTH_ + - (CACHE_POOL_NO_HIT_SET) tasks: - workunit: clients: diff --git a/qa/suites/rbd/valgrind/workloads/c_api_tests_with_journaling.yaml b/qa/suites/rbd/valgrind/workloads/c_api_tests_with_journaling.yaml index eda2b5e8a607b..f1121a4039658 100644 --- a/qa/suites/rbd/valgrind/workloads/c_api_tests_with_journaling.yaml +++ b/qa/suites/rbd/valgrind/workloads/c_api_tests_with_journaling.yaml @@ -1,3 +1,8 @@ +overrides: + ceph: + log-whitelist: + - overall HEALTH_ + - (CACHE_POOL_NO_HIT_SET) tasks: - workunit: clients: diff --git a/qa/suites/rbd/valgrind/workloads/rbd_mirror.yaml b/qa/suites/rbd/valgrind/workloads/rbd_mirror.yaml index 4a2ee40e394ca..8adc7209ad7df 100644 --- a/qa/suites/rbd/valgrind/workloads/rbd_mirror.yaml +++ b/qa/suites/rbd/valgrind/workloads/rbd_mirror.yaml @@ -1,3 +1,8 @@ +overrides: + ceph: + log-whitelist: + - overall HEALTH_ + - (CACHE_POOL_NO_HIT_SET) tasks: - workunit: clients: diff --git a/qa/suites/rgw/thrash/thrashosds-health.yaml b/qa/suites/rgw/thrash/thrashosds-health.yaml new file mode 120000 index 0000000000000..ebf7f34f39bcf --- /dev/null +++ b/qa/suites/rgw/thrash/thrashosds-health.yaml @@ -0,0 +1 @@ +../../../tasks/thrashosds-health.yaml \ No newline at end of file diff --git a/qa/tasks/ceph.py b/qa/tasks/ceph.py index 69ccbde5aa340..5318643d27a55 100644 --- a/qa/tasks/ceph.py +++ b/qa/tasks/ceph.py @@ -331,6 +331,13 @@ def create_rbd_pool(ctx, config): cluster_name = config['cluster'] first_mon = teuthology.get_first_mon(ctx, config, cluster_name) (mon_remote,) = ctx.cluster.only(first_mon).remotes.iterkeys() + log.info('Waiting for OSDs to come up') + teuthology.wait_until_osds_up( + ctx, + cluster=ctx.cluster, + remote=mon_remote, + ceph_cluster=cluster_name, + ) log.info('Creating RBD pool') mon_remote.run( args=['sudo', 'ceph', '--cluster', cluster_name, @@ -1621,3 +1628,20 @@ def task(ctx, config): finally: if config.get('wait-for-scrub', True): osd_scrub_pgs(ctx, config) + + # stop logging health to clog during shutdown, or else we generate + # a bunch of scary messages unrelated to our actual run. + firstmon = teuthology.get_first_mon(ctx, config, config['cluster']) + (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys() + mon0_remote.run( + args=[ + 'sudo', + 'ceph', + '--cluster', config['cluster'], + 'tell', + 'mon.*', + 'injectargs', + '--', + '--no-mon-health-to-clog', + ] + ) diff --git a/qa/tasks/ceph_test_case.py b/qa/tasks/ceph_test_case.py index 270c18553edbb..47f3921347dbd 100644 --- a/qa/tasks/ceph_test_case.py +++ b/qa/tasks/ceph_test_case.py @@ -83,7 +83,8 @@ def wait_for_health(self, pattern, timeout): """ def seen_health_warning(): health = self.ceph_cluster.mon_manager.get_mon_health() - summary_strings = [s['summary'] for s in health['summary']] + codes = [s for s in health['checks']] + summary_strings = [s[1]['message'] for s in health['checks'].iteritems()] if len(summary_strings) == 0: log.debug("Not expected number of summary strings ({0})".format(summary_strings)) return False @@ -91,6 +92,8 @@ def seen_health_warning(): for ss in summary_strings: if pattern in ss: return True + if pattern in codes: + return True log.debug("Not found expected summary strings yet ({0})".format(summary_strings)) return False @@ -103,7 +106,7 @@ def wait_for_health_clear(self, timeout): """ def is_clear(): health = self.ceph_cluster.mon_manager.get_mon_health() - return len(health['summary']) == 0 + return len(health['checks']) == 0 self.wait_until_true(is_clear, timeout) diff --git a/qa/tasks/cephfs/test_auto_repair.py b/qa/tasks/cephfs/test_auto_repair.py index 033d8dde902c6..c0aa2e4c70fae 100644 --- a/qa/tasks/cephfs/test_auto_repair.py +++ b/qa/tasks/cephfs/test_auto_repair.py @@ -81,7 +81,7 @@ def test_mds_readonly(self): self.assertTrue(writer.finished) # The MDS should report its readonly health state to the mon - self.wait_for_health("MDS in read-only mode", timeout=30) + self.wait_for_health("MDS_READ_ONLY", timeout=30) # restart mds to make it writable self.fs.mds_fail_restart() diff --git a/qa/tasks/cephfs/test_client_limits.py b/qa/tasks/cephfs/test_client_limits.py index f25cb4a21f33b..d8675fdad8b16 100644 --- a/qa/tasks/cephfs/test_client_limits.py +++ b/qa/tasks/cephfs/test_client_limits.py @@ -62,12 +62,12 @@ def _test_client_pin(self, use_subdir): # MDS should not be happy about that, as the client is failing to comply # with the SESSION_RECALL messages it is being sent mds_recall_state_timeout = int(self.fs.get_config("mds_recall_state_timeout")) - self.wait_for_health("failing to respond to cache pressure", + self.wait_for_health("MDS_HEALTH_CLIENT_RECALL", mds_recall_state_timeout + 10) # We can also test that the MDS health warning for oversized # cache is functioning as intended. - self.wait_for_health("Too many inodes in cache", + self.wait_for_health("MDS_CACHE_OVERSIZED", mds_recall_state_timeout + 10) # When the client closes the files, it should retain only as many caps as allowed @@ -123,7 +123,7 @@ def test_client_release_bug(self): # After mds_revoke_cap_timeout, we should see a health warning (extra lag from # MDS beacon period) mds_revoke_cap_timeout = int(self.fs.get_config("mds_revoke_cap_timeout")) - self.wait_for_health("failing to respond to capability release", mds_revoke_cap_timeout + 10) + self.wait_for_health("MDS_CLIENT_RECALL", mds_revoke_cap_timeout + 10) # Client B should still be stuck self.assertFalse(rproc.finished) @@ -163,7 +163,7 @@ def test_client_oldest_tid(self): self.mount_a.create_n_files("testdir/file2", 5, True) # Wait for the health warnings. Assume mds can handle 10 request per second at least - self.wait_for_health("failing to advance its oldest client/flush tid", max_requests / 10) + self.wait_for_health("MDS_CLIENT_OLDEST_TID", max_requests / 10) def _test_client_cache_size(self, mount_subdir): """ diff --git a/qa/tasks/cephfs/test_failover.py b/qa/tasks/cephfs/test_failover.py index 1bb2ff7322b33..53c2d5e301e7d 100644 --- a/qa/tasks/cephfs/test_failover.py +++ b/qa/tasks/cephfs/test_failover.py @@ -112,7 +112,7 @@ def test_standby_count_wanted(self): victim = standbys.pop() self.fs.mds_stop(victim) log.info("waiting for insufficient standby daemon warning") - self.wait_for_health("insufficient standby daemons available", grace*2) + self.wait_for_health("MDS_INSUFFICIENT_STANDBY", grace*2) # restart the standby, see that he becomes a standby, check health clears self.fs.mds_restart(victim) @@ -127,7 +127,7 @@ def test_standby_count_wanted(self): self.assertGreaterEqual(len(standbys), 1) self.fs.mon_manager.raw_cluster_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', str(len(standbys)+1)) log.info("waiting for insufficient standby daemon warning") - self.wait_for_health("insufficient standby daemons available", grace*2) + self.wait_for_health("MDS_INSUFFICIENT_STANDBY", grace*2) # Set it to 0 self.fs.mon_manager.raw_cluster_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', '0') diff --git a/qa/tasks/mon_clock_skew_check.py b/qa/tasks/mon_clock_skew_check.py index 891e6ec484ede..547339f79a11b 100644 --- a/qa/tasks/mon_clock_skew_check.py +++ b/qa/tasks/mon_clock_skew_check.py @@ -13,43 +13,19 @@ class ClockSkewCheck: """ - Periodically check if there are any clock skews among the monitors in the - quorum. By default, assume no skews are supposed to exist; that can be - changed using the 'expect-skew' option. If 'fail-on-skew' is set to false, - then we will always succeed and only report skews if any are found. - - This class does not spawn a thread. It assumes that, if that is indeed - wanted, it should be done by a third party (for instance, the task using - this class). We intend it as such in order to reuse this class if need be. + Check if there are any clock skews among the monitors in the + quorum. This task accepts the following options: - interval amount of seconds to wait in-between checks. (default: 30.0) - max-skew maximum skew, in seconds, that is considered tolerable before - issuing a warning. (default: 0.05) + interval amount of seconds to wait before check. (default: 30.0) expect-skew 'true' or 'false', to indicate whether to expect a skew during the run or not. If 'true', the test will fail if no skew is found, and succeed if a skew is indeed found; if 'false', it's the other way around. (default: false) - never-fail Don't fail the run if a skew is detected and we weren't - expecting it, or if no skew is detected and we were expecting - it. (default: False) - - at-least-once Runs at least once, even if we are told to stop. - (default: True) - at-least-once-timeout If we were told to stop but we are attempting to - run at least once, timeout after this many seconds. - (default: 600) - - Example: - Expect a skew higher than 0.05 seconds, but only report it without - failing the teuthology run. - mon_clock_skew_check: - interval: 30 - max-skew: 0.05 - expect_skew: true - never-fail: true + expect-skew: true """ def __init__(self, ctx, manager, config, logger): @@ -63,181 +39,15 @@ def __init__(self, ctx, manager, config, logger): if self.config is None: self.config = dict() - self.check_interval = float(self.config.get('interval', 30.0)) - - first_mon = teuthology.get_first_mon(ctx, config) - remote = ctx.cluster.only(first_mon).remotes.keys()[0] - proc = remote.run( - args=[ - 'sudo', - 'ceph-mon', - '-i', first_mon[4:], - '--show-config-value', 'mon_clock_drift_allowed' - ], stdout=StringIO(), wait=True - ) - self.max_skew = self.config.get('max-skew', float(proc.stdout.getvalue())) - - self.expect_skew = self.config.get('expect-skew', False) - self.never_fail = self.config.get('never-fail', False) - self.at_least_once = self.config.get('at-least-once', True) - self.at_least_once_timeout = self.config.get('at-least-once-timeout', 600.0) - - def info(self, x): - """ - locally define logger for info messages - """ - self.logger.info(x) - - def warn(self, x): - """ - locally define logger for warnings - """ - self.logger.warn(x) - - def debug(self, x): - """ - locally define logger for debug messages - """ - self.logger.info(x) - self.logger.debug(x) - - def finish(self): - """ - Break out of the do_check loop. - """ - self.stopping = True - - def sleep_interval(self): - """ - If a sleep interval is set, sleep for that amount of time. - """ - if self.check_interval > 0.0: - self.debug('sleeping for {s} seconds'.format( - s=self.check_interval)) - time.sleep(self.check_interval) - - def print_skews(self, skews): - """ - Display skew values. - """ - total = len(skews) - if total > 0: - self.info('---------- found {n} skews ----------'.format(n=total)) - for mon_id, values in skews.iteritems(): - self.info('mon.{id}: {v}'.format(id=mon_id, v=values)) - self.info('-------------------------------------') - else: - self.info('---------- no skews were found ----------') - - def do_check(self): - """ - Clock skew checker. Loops until finish() is called. - """ - self.info('start checking for clock skews') - skews = dict() - ran_once = False - - started_on = None - - while not self.stopping or (self.at_least_once and not ran_once): - - if self.at_least_once and not ran_once and self.stopping: - if started_on is None: - self.info('kicking-off timeout (if any)') - started_on = time.time() - elif self.at_least_once_timeout > 0.0: - assert time.time() - started_on < self.at_least_once_timeout, \ - 'failed to obtain a timecheck before timeout expired' - - quorum_size = len(teuthology.get_mon_names(self.ctx)) - self.manager.wait_for_mon_quorum_size(quorum_size) - - health = self.manager.get_mon_health(True) - timechecks = health['timechecks'] - - clean_check = False - if timechecks['round_status'] == 'finished': - assert (timechecks['round'] % 2) == 0, \ - 'timecheck marked as finished but round ' \ - 'disagrees (r {r})'.format( - r=timechecks['round']) - clean_check = True - else: - assert timechecks['round_status'] == 'on-going', \ - 'timecheck status expected \'on-going\' ' \ - 'but found \'{s}\' instead'.format( - s=timechecks['round_status']) - if 'mons' in timechecks.keys() and len(timechecks['mons']) > 1: - self.info('round still on-going, but there are available reports') - else: - self.info('no timechecks available just yet') - self.sleep_interval() - continue - - assert len(timechecks['mons']) > 1, \ - 'there are not enough reported timechecks; ' \ - 'expected > 1 found {n}'.format(n=len(timechecks['mons'])) - - for check in timechecks['mons']: - mon_skew = float(check['skew']) - mon_health = check['health'] - mon_id = check['name'] - if abs(mon_skew) > self.max_skew: - assert mon_health == 'HEALTH_WARN', \ - 'mon.{id} health is \'{health}\' but skew {s} > max {ms}'.format( - id=mon_id,health=mon_health,s=abs(mon_skew),ms=self.max_skew) - - log_str = 'mon.{id} with skew {s} > max {ms}'.format( - id=mon_id,s=abs(mon_skew),ms=self.max_skew) - - """ add to skew list """ - details = check['details'] - skews[mon_id] = {'skew': mon_skew, 'details': details} - - if self.expect_skew: - self.info('expected skew: {str}'.format(str=log_str)) - else: - self.warn('unexpected skew: {str}'.format(str=log_str)) - - if clean_check or (self.expect_skew and len(skews) > 0): - ran_once = True - self.print_skews(skews) - self.sleep_interval() - - total = len(skews) - self.print_skews(skews) - - error_str = '' - found_error = False - - if self.expect_skew: - if total == 0: - error_str = 'We were expecting a skew, but none was found!' - found_error = True - else: - if total > 0: - error_str = 'We were not expecting a skew, but we did find it!' - found_error = True - - if found_error: - self.info(error_str) - if not self.never_fail: - assert False, error_str - -@contextlib.contextmanager def task(ctx, config): - """ - Use clas ClockSkewCheck to check for clock skews on the monitors. - This task will spawn a thread running ClockSkewCheck's do_check(). - - All the configuration will be directly handled by ClockSkewCheck, - so please refer to the class documentation for further information. - """ if config is None: config = {} assert isinstance(config, dict), \ 'mon_clock_skew_check task only accepts a dict for configuration' + interval = float(config.get('interval', 30.0)) + expect_skew = config.get('expect-skew', False) + log.info('Beginning mon_clock_skew_check...') first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() @@ -247,15 +57,20 @@ def task(ctx, config): logger=log.getChild('ceph_manager'), ) - skew_check = ClockSkewCheck(ctx, - manager, config, - logger=log.getChild('mon_clock_skew_check')) - skew_check_thread = gevent.spawn(skew_check.do_check) - try: - yield - finally: - log.info('joining mon_clock_skew_check') - skew_check.finish() - skew_check_thread.get() - + quorum_size = len(teuthology.get_mon_names(ctx)) + manager.wait_for_mon_quorum_size(quorum_size) + + # wait a bit + log.info('sleeping for {s} seconds'.format( + s=interval)) + time.sleep(interval) + + health = manager.get_mon_health(True) + log.info('got health %s' % health) + if expect_skew: + if 'MON_CLOCK_SKEW' not in health['checks']: + raise RuntimeError('expected MON_CLOCK_SKEW but got none') + else: + if 'MON_CLOCK_SKEW' in health['checks']: + raise RuntimeError('got MON_CLOCK_SKEW but expected none') diff --git a/qa/tasks/thrashosds-health.yaml b/qa/tasks/thrashosds-health.yaml new file mode 100644 index 0000000000000..7113e5948aa2b --- /dev/null +++ b/qa/tasks/thrashosds-health.yaml @@ -0,0 +1,13 @@ +overrides: + ceph: + log-whitelist: + - overall HEALTH_ + - (OSDMAP_FLAGS) + - (OSD_ + - (PG_ + - (POOL_ + - (CACHE_POOL_ + - (SMALLER_PGP_NUM) + - (OBJECT_ + - (REQUEST_SLOW) + - (TOO_FEW_PGS) diff --git a/qa/workunits/cephtool/test.sh b/qa/workunits/cephtool/test.sh index e17f97efdabe6..428c404baf5c4 100755 --- a/qa/workunits/cephtool/test.sh +++ b/qa/workunits/cephtool/test.sh @@ -706,6 +706,8 @@ function test_mon_misc() ceph health --format json-pretty ceph health detail --format xml-pretty + ceph time-sync-status + ceph node ls for t in mon osd mds ; do ceph node ls $t @@ -1437,21 +1439,21 @@ function test_mon_osd() ceph osd find 0 ceph osd add-nodown 0 1 - ceph health detail | grep 'nodown osd(s).*0.*1' + ceph health detail | grep 'NODOWN' ceph osd rm-nodown 0 1 - ! ceph health detail | grep 'nodown osd(s).*0.*1' + ! ceph health detail | grep 'NODOWN' ceph osd out 0 # so we can mark it as noin later ceph osd add-noin 0 - ceph health detail | grep 'noin osd(s).*0' + ceph health detail | grep 'NOIN' ceph osd rm-noin 0 - ! ceph health detail | grep 'noin osd(s).*0' + ! ceph health detail | grep 'NOIN' ceph osd in 0 ceph osd add-noout 0 - ceph health detail | grep 'noout osd(s).*0' + ceph health detail | grep 'NOOUT' ceph osd rm-noout 0 - ! ceph health detail | grep 'noout osds(s).*0' + ! ceph health detail | grep 'NOOUT' # test osd id parse expect_false ceph osd add-noup 797er @@ -1470,12 +1472,12 @@ function test_mon_osd() ceph osd add-nodown $osd ceph osd add-noout $osd done - ceph -s | grep 'nodown osd(s)' - ceph -s | grep 'noout osd(s)' + ceph -s | grep 'NODOWN' + ceph -s | grep 'NOOUT' ceph osd rm-nodown any ceph osd rm-noout all - ! ceph -s | grep 'nodown osd(s)' - ! ceph -s | grep 'noout osd(s)' + ! ceph -s | grep 'NODOWN' + ! ceph -s | grep 'NOOUT' # make sure mark out preserves weight ceph osd reweight osd.0 .5 @@ -1777,29 +1779,38 @@ function test_mon_pg() # Check health status ceph osd set-nearfull-ratio .913 - ceph health | grep 'HEALTH_ERR.*Full ratio(s) out of order' - ceph health detail | grep 'backfillfull_ratio (0.912) < nearfull_ratio (0.913), increased' + ceph health -f json | grep OSD_OUT_OF_ORDER_FULL + ceph health detail | grep OSD_OUT_OF_ORDER_FULL ceph osd set-nearfull-ratio .892 ceph osd set-backfillfull-ratio .963 - ceph health detail | grep 'full_ratio (0.962) < backfillfull_ratio (0.963), increased' + ceph health -f json | grep OSD_OUT_OF_ORDER_FULL + ceph health detail | grep OSD_OUT_OF_ORDER_FULL ceph osd set-backfillfull-ratio .912 # Check injected full results $SUDO ceph --admin-daemon $(get_admin_socket osd.0) injectfull nearfull - wait_for_health "HEALTH_WARN.*1 nearfull osd(s)" + wait_for_health "OSD_NEARFULL" + ceph health detail | grep "osd.0 is near full" + $SUDO ceph --admin-daemon $(get_admin_socket osd.0) injectfull none + wait_for_health_ok + $SUDO ceph --admin-daemon $(get_admin_socket osd.1) injectfull backfillfull - wait_for_health "HEALTH_WARN.*1 backfillfull osd(s)" + wait_for_health "OSD_BACKFILLFULL" + ceph health detail | grep "osd.1 is backfill full" + $SUDO ceph --admin-daemon $(get_admin_socket osd.1) injectfull none + wait_for_health_ok + $SUDO ceph --admin-daemon $(get_admin_socket osd.2) injectfull failsafe # failsafe and full are the same as far as the monitor is concerned - wait_for_health "HEALTH_ERR.*1 full osd(s)" + wait_for_health "OSD_FULL" + ceph health detail | grep "osd.2 is full" + $SUDO ceph --admin-daemon $(get_admin_socket osd.2) injectfull none + wait_for_health_ok + $SUDO ceph --admin-daemon $(get_admin_socket osd.0) injectfull full - wait_for_health "HEALTH_ERR.*2 full osd(s)" + wait_for_health "OSD_FULL" ceph health detail | grep "osd.0 is full" - ceph health detail | grep "osd.2 is full" - ceph health detail | grep "osd.1 is backfill full" $SUDO ceph --admin-daemon $(get_admin_socket osd.0) injectfull none - $SUDO ceph --admin-daemon $(get_admin_socket osd.1) injectfull none - $SUDO ceph --admin-daemon $(get_admin_socket osd.2) injectfull none wait_for_health_ok ceph pg stat | grep 'pgs:' diff --git a/src/common/config_opts.h b/src/common/config_opts.h index d879a2c38a858..6f1156a29017f 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -308,8 +308,7 @@ OPTION(mon_clock_drift_allowed, OPT_FLOAT, .050) // allowed clock drift between OPTION(mon_clock_drift_warn_backoff, OPT_FLOAT, 5) // exponential backoff for clock drift warnings OPTION(mon_timecheck_interval, OPT_FLOAT, 300.0) // on leader, timecheck (clock drift check) interval (seconds) OPTION(mon_timecheck_skew_interval, OPT_FLOAT, 30.0) // on leader, timecheck (clock drift check) interval when in presence of a skew (seconds) -OPTION(mon_pg_stuck_threshold, OPT_INT, 300) // number of seconds after which pgs can be considered inactive, unclean, or stale (see doc/control.rst under dump_stuck for more info) -OPTION(mon_health_max_detail, OPT_INT, 50) // max detailed pgs to report in health detail +OPTION(mon_pg_stuck_threshold, OPT_INT, 60) // number of seconds after which pgs can be considered stuck inactive, unclean, etc (see doc/control.rst under dump_stuck for more info) OPTION(mon_pg_min_inactive, OPT_U64, 1) // the number of PGs which have to be inactive longer than 'mon_pg_stuck_threshold' before health goes into ERR. 0 means disabled, never go into ERR. OPTION(mon_pg_warn_min_per_osd, OPT_INT, 30) // min # pgs per (in) osd before we warn the admin OPTION(mon_pg_warn_max_per_osd, OPT_INT, 300) // max # pgs per (in) osd before we warn the admin @@ -352,6 +351,8 @@ OPTION(mon_health_data_update_interval, OPT_FLOAT, 60.0) OPTION(mon_health_to_clog, OPT_BOOL, true) OPTION(mon_health_to_clog_interval, OPT_INT, 3600) OPTION(mon_health_to_clog_tick_interval, OPT_DOUBLE, 60.0) +OPTION(mon_health_preluminous_compat, OPT_BOOL, false) +OPTION(mon_health_max_detail, OPT_INT, 50) // max detailed pgs to report in health detail OPTION(mon_data_avail_crit, OPT_INT, 5) OPTION(mon_data_avail_warn, OPT_INT, 30) OPTION(mon_data_size_warn, OPT_U64, 15*1024*1024*1024) // issue a warning when the monitor's data store goes over 15GB (in bytes) @@ -1754,6 +1755,7 @@ OPTION(mgr_service_beacon_grace, OPT_DOUBLE, 60.0) OPTION(mon_mgr_digest_period, OPT_INT, 5) // How frequently to send digests OPTION(mon_mgr_beacon_grace, OPT_INT, 30) // How long to wait to failover OPTION(mon_mgr_inactive_grace, OPT_INT, 60) // How long before health WARN -> ERR +OPTION(mon_mgr_mkfs_grace, OPT_INT, 60) // How long before we complain about MGR_DOWN OPTION(rgw_crypt_require_ssl, OPT_BOOL, true) // requests including encryption key headers must be sent over ssl OPTION(rgw_crypt_default_encryption_key, OPT_STR, "") // base64 encoded key for encryption of rgw objects OPTION(rgw_crypt_s3_kms_encryption_keys, OPT_STR, "") // extra keys that may be used for aws:kms diff --git a/src/include/health.h b/src/include/health.h new file mode 100644 index 0000000000000..b23a4d4e2b32e --- /dev/null +++ b/src/include/health.h @@ -0,0 +1,68 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include +#include + +#include "include/encoding.h" + +// health_status_t +enum health_status_t { + HEALTH_ERR = 0, + HEALTH_WARN = 1, + HEALTH_OK = 2, +}; + +static inline void encode(health_status_t hs, bufferlist& bl) { + uint8_t v = hs; + ::encode(v, bl); +} +static inline void decode(health_status_t& hs, bufferlist::iterator& p) { + uint8_t v; + ::decode(v, p); + hs = health_status_t(v); +} +template<> +struct denc_traits { + static constexpr bool supported = true; + static constexpr bool featured = false; + static constexpr bool bounded = true; + static constexpr bool need_contiguous = false; + static void bound_encode(const bufferptr& v, size_t& p, uint64_t f=0) { + p++; + } + static void encode(const health_status_t& v, + buffer::list::contiguous_appender& p, + uint64_t f=0) { + ::denc((uint8_t)v, p); + } + static void decode(health_status_t& v, buffer::ptr::iterator& p, + uint64_t f=0) { + uint8_t tmp; + ::denc(tmp, p); + v = health_status_t(tmp); + } + static void decode(health_status_t& v, buffer::list::iterator& p, + uint64_t f=0) { + uint8_t tmp; + ::denc(tmp, p); + v = health_status_t(tmp); + } +}; + +inline std::ostream& operator<<(std::ostream &oss, const health_status_t status) { + switch (status) { + case HEALTH_ERR: + oss << "HEALTH_ERR"; + break; + case HEALTH_WARN: + oss << "HEALTH_WARN"; + break; + case HEALTH_OK: + oss << "HEALTH_OK"; + break; + } + return oss; +} diff --git a/src/include/types.h b/src/include/types.h index 371f884f82e85..e904a151d75d8 100644 --- a/src/include/types.h +++ b/src/include/types.h @@ -411,29 +411,6 @@ inline ostream& operator<<(ostream& out, const ceph_mon_subscribe_item& i) << ((i.flags & CEPH_SUBSCRIBE_ONETIME) ? "" : "+"); } -enum health_status_t { - HEALTH_ERR = 0, - HEALTH_WARN = 1, - HEALTH_OK = 2, -}; - -#ifdef __cplusplus -inline ostream& operator<<(ostream &oss, const health_status_t status) { - switch (status) { - case HEALTH_ERR: - oss << "HEALTH_ERR"; - break; - case HEALTH_WARN: - oss << "HEALTH_WARN"; - break; - case HEALTH_OK: - oss << "HEALTH_OK"; - break; - } - return oss; -} -#endif - struct weightf_t { float v; // cppcheck-suppress noExplicitConstructor diff --git a/src/mds/FSMap.cc b/src/mds/FSMap.cc index eb08d02b7dcb0..cdce14b60892c 100644 --- a/src/mds/FSMap.cc +++ b/src/mds/FSMap.cc @@ -18,6 +18,8 @@ #include using std::stringstream; +#include "mon/health_check.h" + void Filesystem::dump(Formatter *f) const { @@ -327,6 +329,30 @@ bool FSMap::check_health(void) return changed; } +void FSMap::get_health_checks(health_check_map_t *checks) const +{ + mds_rank_t standby_count_wanted = 0; + for (const auto &i : filesystems) { + const auto &fs = i.second; + health_check_map_t fschecks; + fs->mds_map.get_health_checks(&fschecks); + checks->merge(fschecks); + standby_count_wanted = std::max( + standby_count_wanted, + fs->mds_map.get_standby_count_wanted((mds_rank_t)standby_daemons.size())); + } + + // MDS_INSUFFICIENT_STANDBY + if (standby_count_wanted) { + std::ostringstream oss, dss; + oss << "insufficient standby daemons available"; + auto& d = checks->add("MDS_INSUFFICIENT_STANDBY", HEALTH_WARN, oss.str()); + dss << "have " << standby_daemons.size() << "; want " << standby_count_wanted + << " more"; + d.detail.push_back(dss.str()); + } +} + void FSMap::encode(bufferlist& bl, uint64_t features) const { if (features & CEPH_FEATURE_SERVER_JEWEL) { diff --git a/src/mds/FSMap.h b/src/mds/FSMap.h index 3d389c48885b5..ea102a712740c 100644 --- a/src/mds/FSMap.h +++ b/src/mds/FSMap.h @@ -35,6 +35,7 @@ #include "mds/mdstypes.h" class CephContext; +class health_check_map_t; #define MDS_FEATURE_INCOMPAT_BASE CompatSet::Feature(1, "base v0.20") #define MDS_FEATURE_INCOMPAT_CLIENTRANGES CompatSet::Feature(2, "client writeable ranges") @@ -476,6 +477,8 @@ class FSMap { void get_health(list >& summary, list > *detail) const; + void get_health_checks(health_check_map_t *checks) const; + bool check_health(void); /** diff --git a/src/mds/MDSMap.cc b/src/mds/MDSMap.cc index b397eb089e9fa..bd54469756f42 100644 --- a/src/mds/MDSMap.cc +++ b/src/mds/MDSMap.cc @@ -18,6 +18,8 @@ #include using std::stringstream; +#include "mon/health_check.h" + // features CompatSet get_mdsmap_compat_set_all() { @@ -404,6 +406,78 @@ void MDSMap::get_health(list >& summary, } } +void MDSMap::get_health_checks(health_check_map_t *checks) const +{ + // FS_WITH_FAILED_MDS + // MDS_FAILED + if (!failed.empty()) { + health_check_t& fscheck = checks->add( + "FS_WITH_FAILED_MDS", HEALTH_WARN, + "%num% filesystem%plurals% %isorare% have a failed mds daemon"); + ostringstream ss; + ss << "fs " << fs_name << " has " << failed.size() << " failed mds" + << (failed.size() > 1 ? "s" : ""); + fscheck.detail.push_back(ss.str()); + + health_check_t& check = checks->add("MDS_FAILED", HEALTH_ERR, + "%num% mds daemon%plurals% down"); + for (auto p : failed) { + std::ostringstream oss; + oss << "fs " << fs_name << " mds." << p << " has failed"; + check.detail.push_back(oss.str()); + } + } + + // MDS_DAMAGED + if (!damaged.empty()) { + health_check_t& check = checks->add("MDS_DAMAGED", HEALTH_ERR, + "%num% mds daemon%plurals% damaged"); + for (auto p : damaged) { + std::ostringstream oss; + oss << "fs " << fs_name << " mds." << p << " is damaged"; + check.detail.push_back(oss.str()); + } + } + + // FS_DEGRADED + // MDS_DEGRADED + if (is_degraded()) { + health_check_t& fscheck = checks->add( + "FS_DEGRADED", HEALTH_WARN, + "%num% filesystem%plurals% %isorare% degraded"); + ostringstream ss; + ss << "fs " << fs_name << " is degraded"; + fscheck.detail.push_back(ss.str()); + + list detail; + for (mds_rank_t i = mds_rank_t(0); i< get_max_mds(); i++) { + if (!is_up(i)) + continue; + mds_gid_t gid = up.find(i)->second; + map::const_iterator info = mds_info.find(gid); + stringstream ss; + ss << "fs " << fs_name << " mds." << info->second.name << " at " + << info->second.addr << " rank " << i; + if (is_resolve(i)) + ss << " is resolving"; + if (is_replay(i)) + ss << " is replaying journal"; + if (is_rejoin(i)) + ss << " is rejoining"; + if (is_reconnect(i)) + ss << " is reconnecting to clients"; + if (ss.str().length()) + detail.push_back(ss.str()); + } + if (!detail.empty()) { + health_check_t& check = checks->add( + "MDS_DEGRADED", HEALTH_WARN, + "%num% mds daemon%plurals% %isorare% degraded"); + check.detail.insert(check.detail.end(), detail.begin(), detail.end()); + } + } +} + void MDSMap::mds_info_t::encode_versioned(bufferlist& bl, uint64_t features) const { ENCODE_START(7, 4, bl); diff --git a/src/mds/MDSMap.h b/src/mds/MDSMap.h index e99be2be67b57..e6423c9bea1fa 100644 --- a/src/mds/MDSMap.h +++ b/src/mds/MDSMap.h @@ -21,6 +21,7 @@ #include "include/types.h" #include "common/Clock.h" #include "msg/Message.h" +#include "include/health.h" #include #include @@ -58,6 +59,7 @@ */ class CephContext; +class health_check_map_t; extern CompatSet get_mdsmap_compat_set_all(); extern CompatSet get_mdsmap_compat_set_default(); @@ -461,6 +463,8 @@ class MDSMap { void get_health(list >& summary, list > *detail) const; + void get_health_checks(health_check_map_t *checks) const; + typedef enum { AVAILABLE = 0, diff --git a/src/messages/MMDSBeacon.h b/src/messages/MMDSBeacon.h index 31febe50a353c..a83502e85c444 100644 --- a/src/messages/MMDSBeacon.h +++ b/src/messages/MMDSBeacon.h @@ -43,6 +43,56 @@ enum mds_metric_t { MDS_HEALTH_CACHE_OVERSIZED }; +static inline const char *mds_metric_name(mds_metric_t m) +{ + switch (m) { + case MDS_HEALTH_TRIM: return "MDS_TRIM"; + case MDS_HEALTH_CLIENT_RECALL: return "MDS_CLIENT_RECALL"; + case MDS_HEALTH_CLIENT_LATE_RELEASE: return "MDS_CLIENT_LATE_RELEASE"; + case MDS_HEALTH_CLIENT_RECALL_MANY: return "MDS_CLIENT_RECALL_MANY"; + case MDS_HEALTH_CLIENT_LATE_RELEASE_MANY: return "MDS_CLIENT_LATE_RELEASE_MANY"; + case MDS_HEALTH_CLIENT_OLDEST_TID: return "MDS_CLIENT_OLDEST_TID"; + case MDS_HEALTH_CLIENT_OLDEST_TID_MANY: return "MDS_CLIENT_OLDEST_TID_MANY"; + case MDS_HEALTH_DAMAGE: return "MDS_DAMAGE"; + case MDS_HEALTH_READ_ONLY: return "MDS_READ_ONLY"; + case MDS_HEALTH_SLOW_REQUEST: return "MDS_SLOW_REQUEST"; + case MDS_HEALTH_CACHE_OVERSIZED: return "MDS_CACHE_OVERSIZED"; + default: + return "???"; + } +} + +static inline const char *mds_metric_summary(mds_metric_t m) +{ + switch (m) { + case MDS_HEALTH_TRIM: + return "%num% MDSs behind on trimming"; + case MDS_HEALTH_CLIENT_RECALL: + return "%num% clients failing to respond to cache pressure"; + case MDS_HEALTH_CLIENT_LATE_RELEASE: + return "%num% clients failing to respond to capability release"; + case MDS_HEALTH_CLIENT_RECALL_MANY: + return "%num% MDSs have many clients failing to respond to cache pressure"; + case MDS_HEALTH_CLIENT_LATE_RELEASE_MANY: + return "%num% MDSs have many clients failing to respond to capability " + "release"; + case MDS_HEALTH_CLIENT_OLDEST_TID: + return "%num% clients failing to advance oldest client/flush tid"; + case MDS_HEALTH_CLIENT_OLDEST_TID_MANY: + return "%num% MDSs have clients failing to advance oldest client/flush tid"; + case MDS_HEALTH_DAMAGE: + return "%num% MDSs report damaged metadata"; + case MDS_HEALTH_READ_ONLY: + return "%num% MDSs are read only"; + case MDS_HEALTH_SLOW_REQUEST: + return "%num% MDSs report slow requests"; + case MDS_HEALTH_CACHE_OVERSIZED: + return "%num% MDSs report oversized cache"; + default: + return "???"; + } +} + /** * This structure is designed to allow some flexibility in how we emit health * complaints, such that: diff --git a/src/messages/MMonHealthChecks.h b/src/messages/MMonHealthChecks.h new file mode 100644 index 0000000000000..6b66847633f9d --- /dev/null +++ b/src/messages/MMonHealthChecks.h @@ -0,0 +1,47 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_MMON_HEALTH_CHECKS_H +#define CEPH_MMON_HEALTH_CHECKS_H + +#include "messages/PaxosServiceMessage.h" +#include "mon/health_check.h" + +struct MMonHealthChecks : public PaxosServiceMessage +{ + static const int HEAD_VERSION = 1; + static const int COMPAT_VERSION = 1; + + health_check_map_t health_checks; + + MMonHealthChecks() + : PaxosServiceMessage(MSG_MON_HEALTH_CHECKS, HEAD_VERSION, COMPAT_VERSION) { + } + MMonHealthChecks(health_check_map_t& m) + : PaxosServiceMessage(MSG_MON_HEALTH_CHECKS, HEAD_VERSION, COMPAT_VERSION), + health_checks(m) { + } + +private: + ~MMonHealthChecks() override { } + +public: + const char *get_type_name() const override { return "mon_health_checks"; } + void print(ostream &o) const override { + o << "mon_health_checks(" << health_checks.checks.size() << " checks)"; + } + + void decode_payload() override { + bufferlist::iterator p = payload.begin(); + paxos_decode(p); + ::decode(health_checks, p); + } + + void encode_payload(uint64_t features) override { + paxos_encode(); + ::encode(health_checks, payload); + } + +}; + +#endif diff --git a/src/messages/MMonMgrReport.h b/src/messages/MMonMgrReport.h index 8f3a8fe911540..eef0966f15c0e 100644 --- a/src/messages/MMonMgrReport.h +++ b/src/messages/MMonMgrReport.h @@ -17,17 +17,8 @@ #include "messages/PaxosServiceMessage.h" #include "include/types.h" - -// health_status_t -static inline void encode(health_status_t hs, bufferlist& bl) { - uint8_t v = hs; - ::encode(v, bl); -} -static inline void decode(health_status_t& hs, bufferlist::iterator& p) { - uint8_t v; - ::decode(v, p); - hs = health_status_t(v); -} +#include "include/health.h" +#include "mon/health_check.h" class MMonMgrReport : public PaxosServiceMessage { @@ -36,7 +27,7 @@ class MMonMgrReport : public PaxosServiceMessage { public: // PGMapDigest is in data payload - list> health_summary, health_detail; + health_check_map_t health_checks; bufferlist service_map_bl; // encoded ServiceMap MMonMgrReport() @@ -49,20 +40,18 @@ class MMonMgrReport : public PaxosServiceMessage { const char *get_type_name() const override { return "monmgrreport"; } void print(ostream& out) const override { - out << get_type_name(); + out << get_type_name() << "(" << health_checks.checks.size() << " checks)"; } void encode_payload(uint64_t features) override { paxos_encode(); - ::encode(health_summary, payload); - ::encode(health_detail, payload); + ::encode(health_checks, payload); ::encode(service_map_bl, payload); } void decode_payload() override { bufferlist::iterator p = payload.begin(); paxos_decode(p); - ::decode(health_summary, p); - ::decode(health_detail, p); + ::decode(health_checks, p); ::decode(service_map_bl, p); } }; diff --git a/src/mgr/DaemonServer.cc b/src/mgr/DaemonServer.cc index 7b9086c4800dd..6454c8da306a5 100644 --- a/src/mgr/DaemonServer.cc +++ b/src/mgr/DaemonServer.cc @@ -113,6 +113,8 @@ int DaemonServer::init(uint64_t gid, entity_addr_t client_addr) msgr->start(); msgr->add_dispatcher_tail(this); + started_at = ceph_clock_now(); + return 0; } @@ -235,6 +237,7 @@ bool DaemonServer::ms_dispatch(Message *m) switch (m->get_type()) { case MSG_PGSTATS: cluster_state.ingest_pgstats(static_cast(m)); + maybe_ready(m->get_source().num()); m->put(); return true; case MSG_MGR_REPORT: @@ -249,6 +252,35 @@ bool DaemonServer::ms_dispatch(Message *m) }; } +void DaemonServer::maybe_ready(int32_t osd_id) +{ + if (!pgmap_ready && reported_osds.find(osd_id) == reported_osds.end()) { + dout(4) << "initial report from osd " << osd_id << dendl; + reported_osds.insert(osd_id); + std::set up_osds; + + cluster_state.with_osdmap([&](const OSDMap& osdmap) { + osdmap.get_up_osds(up_osds); + }); + + std::set unreported_osds; + std::set_difference(up_osds.begin(), up_osds.end(), + reported_osds.begin(), reported_osds.end(), + std::inserter(unreported_osds, unreported_osds.begin())); + + if (unreported_osds.size() == 0) { + dout(4) << "all osds have reported, sending PG state to mon" << dendl; + pgmap_ready = true; + reported_osds.clear(); + // Avoid waiting for next tick + send_report(); + } else { + dout(4) << "still waiting for " << unreported_osds.size() << " osds" + " to report in before PGMap is ready" << dendl; + } + } +} + void DaemonServer::shutdown() { dout(10) << "begin" << dendl; @@ -977,6 +1009,19 @@ void DaemonServer::_prune_pending_service_map() void DaemonServer::send_report() { + if (!pgmap_ready) { + if (ceph_clock_now() - started_at > g_conf->mgr_stats_period * 4.0) { + pgmap_ready = true; + reported_osds.clear(); + dout(1) << "Giving up on OSDs that haven't reported yet, sending " + << "potentially incomplete PG state to mon" << dendl; + } else { + dout(1) << "Not sending PG status to monitor yet, waiting for OSDs" + << dendl; + return; + } + } + auto m = new MMonMgrReport(); cluster_state.with_pgmap([&](const PGMap& pg_map) { cluster_state.update_delta_stats(); @@ -992,15 +1037,21 @@ void DaemonServer::send_report() } } - // FIXME: reporting health detail here might be a bad idea? cluster_state.with_osdmap([&](const OSDMap& osdmap) { // FIXME: no easy way to get mon features here. this will do for // now, though, as long as we don't make a backward-incompat change. pg_map.encode_digest(osdmap, m->get_data(), CEPH_FEATURES_ALL); dout(10) << pg_map << dendl; - pg_map.get_health(g_ceph_context, osdmap, - m->health_summary, - &m->health_detail); + + pg_map.get_health_checks(g_ceph_context, osdmap, + &m->health_checks); + dout(10) << m->health_checks.checks.size() << " health checks" + << dendl; + dout(20) << "health checks:\n"; + JSONFormatter jf(true); + jf.dump_object("health_checks", m->health_checks); + jf.flush(*_dout); + *_dout << dendl; }); }); // TODO? We currently do not notify the PyModules diff --git a/src/mgr/DaemonServer.h b/src/mgr/DaemonServer.h index 06ee68b8adc80..4877cfe85aed8 100644 --- a/src/mgr/DaemonServer.h +++ b/src/mgr/DaemonServer.h @@ -89,6 +89,11 @@ class DaemonServer : public Dispatcher void _prune_pending_service_map(); + utime_t started_at; + bool pgmap_ready = false; + std::set reported_osds; + void maybe_ready(int32_t osd_id); + public: int init(uint64_t gid, entity_addr_t client_addr); void shutdown(); diff --git a/src/mon/CMakeLists.txt b/src/mon/CMakeLists.txt index 9e40ef58863ca..556157132f805 100644 --- a/src/mon/CMakeLists.txt +++ b/src/mon/CMakeLists.txt @@ -16,6 +16,7 @@ set(lib_mon_srcs AuthMonitor.cc Elector.cc HealthMonitor.cc + OldHealthMonitor.cc DataHealthService.cc PGMonitor.cc PGMap.cc diff --git a/src/mon/ConfigKeyService.h b/src/mon/ConfigKeyService.h index 9977968736593..7dfb140c7e7c3 100644 --- a/src/mon/ConfigKeyService.h +++ b/src/mon/ConfigKeyService.h @@ -57,8 +57,7 @@ class ConfigKeyService : public QuorumService * @{ */ void init() override { } - void get_health(Formatter *f, - list >& summary, + void get_health(list >& summary, list > *detail) override { } bool service_dispatch(MonOpRequestRef op) override; diff --git a/src/mon/DataHealthService.cc b/src/mon/DataHealthService.cc index 6305263a80920..4a5b42ab38889 100644 --- a/src/mon/DataHealthService.cc +++ b/src/mon/DataHealthService.cc @@ -65,16 +65,10 @@ void DataHealthService::start_epoch() } void DataHealthService::get_health( - Formatter *f, list >& summary, list > *detail) { dout(10) << __func__ << dendl; - if (f) { - f->open_object_section("data_health"); - f->open_array_section("mons"); - } - for (map::iterator it = stats.begin(); it != stats.end(); ++it) { string mon_name = mon->monmap->get_name(it->first.addr); @@ -110,22 +104,6 @@ void DataHealthService::get_health( if (detail) detail->push_back(make_pair(health_status, ss.str())); } - - if (f) { - f->open_object_section("mon"); - f->dump_string("name", mon_name.c_str()); - // leave this unenclosed by an object section to avoid breaking backward-compatibility - stats.dump(f); - f->dump_stream("health") << health_status; - if (health_status != HEALTH_OK) - f->dump_string("health_detail", health_detail); - f->close_section(); - } - } - - if (f) { - f->close_section(); // mons - f->close_section(); // data_health } } diff --git a/src/mon/DataHealthService.h b/src/mon/DataHealthService.h index 8834b600b8d9c..91caf4e32115c 100644 --- a/src/mon/DataHealthService.h +++ b/src/mon/DataHealthService.h @@ -65,9 +65,9 @@ class DataHealthService : start_tick(); } - void get_health(Formatter *f, - list >& summary, - list > *detail) override; + void get_health( + list >& summary, + list > *detail) override; int get_type() override { return HealthService::SERVICE_HEALTH_DATA; diff --git a/src/mon/HealthMonitor.cc b/src/mon/HealthMonitor.cc index 0887bdc1b759f..32f62667e25f1 100644 --- a/src/mon/HealthMonitor.cc +++ b/src/mon/HealthMonitor.cc @@ -12,13 +12,13 @@ * */ -#include #include #include +#include +#include -// #include -// Because intusive_ptr clobbers our assert... #include "include/assert.h" +#include "include/stringify.h" #include "mon/Monitor.h" #include "mon/HealthService.h" @@ -26,8 +26,9 @@ #include "mon/DataHealthService.h" #include "messages/MMonHealth.h" +#include "messages/MMonHealthChecks.h" + #include "common/Formatter.h" -// #include "common/config.h" #define dout_subsys ceph_subsys_mon #undef dout_prefix @@ -35,84 +36,345 @@ static ostream& _prefix(std::ostream *_dout, const Monitor *mon, const HealthMonitor *hmon) { return *_dout << "mon." << mon->name << "@" << mon->rank - << "(" << mon->get_state_name() << ")." << hmon->get_name() - << "(" << hmon->get_epoch() << ") "; + << "(" << mon->get_state_name() << ").health "; +} + +HealthMonitor::HealthMonitor(Monitor *m, Paxos *p, const string& service_name) + : PaxosService(m, p, service_name) { } void HealthMonitor::init() { dout(10) << __func__ << dendl; - assert(services.empty()); - services[HealthService::SERVICE_HEALTH_DATA] = new DataHealthService(mon); +} + +void HealthMonitor::create_initial() +{ + dout(10) << __func__ << dendl; +} + +void HealthMonitor::update_from_paxos(bool *need_bootstrap) +{ + version = get_last_committed(); + dout(10) << __func__ << dendl; + load_health(); + + bufferlist qbl; + mon->store->get(service_name, "quorum", qbl); + if (qbl.length()) { + auto p = qbl.begin(); + ::decode(quorum_checks, p); + } else { + quorum_checks.clear(); + } + + bufferlist lbl; + mon->store->get(service_name, "leader", lbl); + if (lbl.length()) { + auto p = lbl.begin(); + ::decode(leader_checks, p); + } else { + leader_checks.clear(); + } - for (map::iterator it = services.begin(); - it != services.end(); - ++it) { - it->second->init(); + dout(20) << "dump:"; + JSONFormatter jf(true); + jf.open_object_section("health"); + jf.open_object_section("quorum_health"); + for (auto& p : quorum_checks) { + string s = string("mon.") + stringify(p.first); + jf.dump_object(s.c_str(), p.second); } + jf.close_section(); + jf.dump_object("leader_health", leader_checks); + jf.close_section(); + jf.flush(*_dout); + *_dout << dendl; } -bool HealthMonitor::service_dispatch(MonOpRequestRef op) +void HealthMonitor::create_pending() { - assert(op->get_req()->get_type() == MSG_MON_HEALTH); - MMonHealth *hm = static_cast(op->get_req()); - int service_type = hm->get_service_type(); - if (services.count(service_type) == 0) { - dout(1) << __func__ << " service type " << service_type - << " not registered -- drop message!" << dendl; - return false; - } - return services[service_type]->service_dispatch(op); + dout(10) << " " << version << dendl; } -void HealthMonitor::start_epoch() { - epoch_t epoch = get_epoch(); - for (map::iterator it = services.begin(); - it != services.end(); ++it) { - it->second->start(epoch); +void HealthMonitor::encode_pending(MonitorDBStore::TransactionRef t) +{ + ++version; + dout(10) << " " << version << dendl; + put_last_committed(t, version); + + bufferlist qbl; + ::encode(quorum_checks, qbl); + t->put(service_name, "quorum", qbl); + bufferlist lbl; + ::encode(leader_checks, lbl); + t->put(service_name, "leader", lbl); + + health_check_map_t pending_health; + + // combine per-mon details carefully... + map> names; // code -> + for (auto p : quorum_checks) { + for (auto q : p.second.checks) { + names[q.first].insert(mon->monmap->get_name(p.first)); + } + pending_health.merge(p.second); } + for (auto p : pending_health.checks) { + p.second.summary = boost::regex_replace( + p.second.summary, + boost::regex("%num%"), stringify(names[p.first].size())); + p.second.summary = boost::regex_replace( + p.second.summary, + boost::regex("%names%"), stringify(names[p.first])); + p.second.summary = boost::regex_replace( + p.second.summary, + boost::regex("%plurals%"), + names[p.first].size() > 1 ? "s" : ""); + p.second.summary = boost::regex_replace( + p.second.summary, + boost::regex("%isorare%"), + names[p.first].size() > 1 ? "are" : "is"); + } + + pending_health.merge(leader_checks); + encode_health(pending_health, t); } -void HealthMonitor::finish_epoch() { - generic_dout(20) << "HealthMonitor::finish_epoch()" << dendl; - for (map::iterator it = services.begin(); - it != services.end(); ++it) { - assert(it->second != NULL); - it->second->finish(); +version_t HealthMonitor::get_trim_to() +{ + // we don't actually need *any* old states, but keep a few. + if (version > 5) { + return version - 5; } + return 0; } -void HealthMonitor::service_shutdown() +bool HealthMonitor::preprocess_query(MonOpRequestRef op) { - dout(0) << "HealthMonitor::service_shutdown " - << services.size() << " services" << dendl; - for (map::iterator it = services.begin(); - it != services.end(); - ++it) { - it->second->shutdown(); - delete it->second; - } - services.clear(); + switch (op->get_req()->get_type()) { + case MSG_MON_HEALTH: + { + MMonHealth *hm = static_cast(op->get_req()); + int service_type = hm->get_service_type(); + if (services.count(service_type) == 0) { + dout(1) << __func__ << " service type " << service_type + << " not registered -- drop message!" << dendl; + return false; + } + return services[service_type]->service_dispatch(op); + } + + case MSG_MON_HEALTH_CHECKS: + return preprocess_health_checks(op); + } + return false; } -void HealthMonitor::get_health(Formatter *f, - list >& summary, - list > *detail) +bool HealthMonitor::prepare_update(MonOpRequestRef op) { - if (f) { - f->open_object_section("health"); - f->open_array_section("health_services"); + return false; +} + +bool HealthMonitor::preprocess_health_checks(MonOpRequestRef op) +{ + MMonHealthChecks *m = static_cast(op->get_req()); + quorum_checks[m->get_source().num()] = m->health_checks; + return true; +} + +void HealthMonitor::tick() +{ + if (!is_active()) { + return; + } + dout(10) << __func__ << dendl; + bool changed = false; + if (check_member_health()) { + changed = true; + } + if (mon->is_leader()) { + if (check_leader_health()) { + changed = true; + } } + if (changed) { + propose_pending(); + } +} + +bool HealthMonitor::check_member_health() +{ + dout(20) << __func__ << dendl; + bool changed = false; - for (map::iterator it = services.begin(); - it != services.end(); - ++it) { - it->second->get_health(f, summary, detail); + // snapshot of usage + DataStats stats; + get_fs_stats(stats.fs_stats, g_conf->mon_data.c_str()); + map extra; + uint64_t store_size = mon->store->get_estimated_size(extra); + assert(store_size > 0); + stats.store_stats.bytes_total = store_size; + stats.store_stats.bytes_sst = extra["sst"]; + stats.store_stats.bytes_log = extra["log"]; + stats.store_stats.bytes_misc = extra["misc"]; + stats.last_update = ceph_clock_now(); + dout(10) << __func__ << " avail " << stats.fs_stats.avail_percent << "%" + << " total " << prettybyte_t(stats.fs_stats.byte_total) + << ", used " << prettybyte_t(stats.fs_stats.byte_used) + << ", avail " << prettybyte_t(stats.fs_stats.byte_avail) << dendl; + + // MON_DISK_{LOW,CRIT,BIG} + health_check_map_t next; + if (stats.fs_stats.avail_percent <= g_conf->mon_data_avail_crit) { + stringstream ss, ss2; + ss << "mon%plurals% %names% %isorare% very low on available space"; + auto& d = next.add("MON_DISK_CRIT", HEALTH_ERR, ss.str()); + ss2 << "mon." << mon->name << " has " << stats.fs_stats.avail_percent + << "% avail"; + d.detail.push_back(ss2.str()); + } else if (stats.fs_stats.avail_percent <= g_conf->mon_data_avail_warn) { + stringstream ss, ss2; + ss << "mon%plurals% %names% %isorare% low on available space"; + auto& d = next.add("MON_DISK_LOW", HEALTH_ERR, ss.str()); + ss2 << "mon." << mon->name << " has " << stats.fs_stats.avail_percent + << "% avail"; + d.detail.push_back(ss2.str()); + } + if (stats.store_stats.bytes_total >= g_conf->mon_data_size_warn) { + stringstream ss, ss2; + ss << "mon%plurals% %names% %isorare% using a lot of disk space"; + auto& d = next.add("MON_DISK_BIG", HEALTH_WARN, ss.str()); + ss2 << "mon." << mon->name << " is " + << prettybyte_t(stats.store_stats.bytes_total) + << " >= mon_data_size_warn (" + << prettybyte_t(g_conf->mon_data_size_warn) << ")"; + d.detail.push_back(ss2.str()); } - if (f) { - f->close_section(); // health_services - f->close_section(); // health + auto p = quorum_checks.find(mon->rank); + if (p == quorum_checks.end() || + p->second != next) { + if (mon->is_leader()) { + // prepare to propose + quorum_checks[mon->rank] = next; + changed = true; + } else { + // tell the leader + mon->messenger->send_message(new MMonHealthChecks(next), + mon->monmap->get_inst(mon->get_leader())); + } } + + // OSD_NO_DOWN_OUT_INTERVAL + { + // Warn if 'mon_osd_down_out_interval' is set to zero. + // Having this option set to zero on the leader acts much like the + // 'noout' flag. It's hard to figure out what's going wrong with clusters + // without the 'noout' flag set but acting like that just the same, so + // we report a HEALTH_WARN in case this option is set to zero. + // This is an ugly hack to get the warning out, but until we find a way + // to spread global options throughout the mon cluster and have all mons + // using a base set of the same options, we need to work around this sort + // of things. + // There's also the obvious drawback that if this is set on a single + // monitor on a 3-monitor cluster, this warning will only be shown every + // third monitor connection. + if (g_conf->mon_warn_on_osd_down_out_interval_zero && + g_conf->mon_osd_down_out_interval == 0) { + ostringstream ss, ds; + ss << "mon%plurals% %names %hasorhave% mon_osd_down_out_interval set to 0"; + auto& d = next.add("OSD_NO_DOWN_OUT_INTERVAL", HEALTH_WARN, ss.str()); + ds << "mon." << mon->name << " has mon_osd_down_out_interval set to 0"; + d.detail.push_back(ds.str()); + } + } + + return changed; } +bool HealthMonitor::check_leader_health() +{ + dout(20) << __func__ << dendl; + bool changed = false; + + // prune quorum_health + { + auto& qset = mon->get_quorum(); + auto p = quorum_checks.begin(); + while (p != quorum_checks.end()) { + if (qset.count(p->first) == 0) { + p = quorum_checks.erase(p); + changed = true; + } else { + ++p; + } + } + } + + health_check_map_t next; + + // MON_DOWN + { + int max = mon->monmap->size(); + int actual = mon->get_quorum().size(); + if (actual < max) { + ostringstream ss; + ss << (max-actual) << "/" << max << " mons down, quorum " + << mon->get_quorum_names(); + auto& d = next.add("MON_DOWN", HEALTH_WARN, ss.str()); + set q = mon->get_quorum(); + for (int i=0; imonmap->get_name(i) << " (rank " << i + << ") addr " << mon->monmap->get_addr(i) + << " is down (out of quorum)"; + d.detail.push_back(ss.str()); + } + } + } + } + + // MON_CLOCK_SKEW + if (!mon->timecheck_skews.empty()) { + list warns; + list details; + for (map::iterator i = mon->timecheck_skews.begin(); + i != mon->timecheck_skews.end(); ++i) { + entity_inst_t inst = i->first; + double skew = i->second; + double latency = mon->timecheck_latencies[inst]; + string name = mon->monmap->get_name(inst.addr); + ostringstream tcss; + health_status_t tcstatus = mon->timecheck_status(tcss, skew, latency); + if (tcstatus != HEALTH_OK) { + warns.push_back(name); + ostringstream tmp_ss; + tmp_ss << "mon." << name + << " addr " << inst.addr << " " << tcss.str() + << " (latency " << latency << "s)"; + details.push_back(tmp_ss.str()); + } + } + if (!warns.empty()) { + ostringstream ss; + ss << "clock skew detected on"; + while (!warns.empty()) { + ss << " mon." << warns.front(); + warns.pop_front(); + if (!warns.empty()) + ss << ","; + } + auto& d = next.add("MON_CLOCK_SKEW", HEALTH_WARN, + "monitor clock skew detected"); + d.detail.swap(details); + } + } + + if (next != leader_checks) { + changed = true; + leader_checks = next; + } + return changed; +} diff --git a/src/mon/HealthMonitor.h b/src/mon/HealthMonitor.h index 9d05c64e990a5..5387ce0340a89 100644 --- a/src/mon/HealthMonitor.h +++ b/src/mon/HealthMonitor.h @@ -14,50 +14,54 @@ #ifndef CEPH_HEALTH_MONITOR_H #define CEPH_HEALTH_MONITOR_H -#include "mon/QuorumService.h" +#include "mon/PaxosService.h" //forward declaration namespace ceph { class Formatter; } class HealthService; -class HealthMonitor : public QuorumService +class HealthMonitor : public PaxosService { map services; - -protected: - void service_shutdown() override; + version_t version = 0; + map quorum_checks; // for each quorum member + health_check_map_t leader_checks; // leader only public: - HealthMonitor(Monitor *m) : QuorumService(m) { } + HealthMonitor(Monitor *m, Paxos *p, const string& service_name); ~HealthMonitor() override { assert(services.empty()); } - /** * @defgroup HealthMonitor_Inherited_h Inherited abstract methods * @{ */ void init() override; - void get_health(Formatter *f, - list >& summary, - list > *detail) override; - bool service_dispatch(MonOpRequestRef op) override; - void start_epoch() override; + void get_health( + list >& summary, + list > *detail, + CephContext *cct) const override {} - void finish_epoch() override; + bool preprocess_query(MonOpRequestRef op) override; + bool prepare_update(MonOpRequestRef op) override; - void cleanup() override { } - void service_tick() override { } + bool preprocess_health_checks(MonOpRequestRef op); + bool prepare_health_checks(MonOpRequestRef op); - int get_type() override { - return QuorumService::SERVICE_HEALTH; - } + bool check_leader_health(); + bool check_member_health(); - string get_name() const override { - return "health"; - } + void create_initial() override; + void update_from_paxos(bool *need_bootstrap) override; + void create_pending() override; + void encode_pending(MonitorDBStore::TransactionRef t) override; + version_t get_trim_to() override; + + void encode_full(MonitorDBStore::TransactionRef t) override { } + + void tick() override; /** * @} // HealthMonitor_Inherited_h diff --git a/src/mon/MDSMonitor.cc b/src/mon/MDSMonitor.cc index 608e1aeedc3e8..c38e681a0312c 100644 --- a/src/mon/MDSMonitor.cc +++ b/src/mon/MDSMonitor.cc @@ -14,6 +14,7 @@ #include #include +#include #include "MDSMonitor.h" #include "FSCommands.h" @@ -99,6 +100,8 @@ void MDSMonitor::update_from_paxos(bool *need_bootstrap) << ", my e " << fsmap.epoch << dendl; assert(version > fsmap.epoch); + load_health(); + // read and decode bufferlist fsmap_bl; fsmap_bl.clear(); @@ -174,6 +177,65 @@ void MDSMonitor::encode_pending(MonitorDBStore::TransactionRef t) } pending_daemon_health_rm.clear(); remove_from_metadata(t); + + // health + health_check_map_t new_checks; + const auto info_map = pending_fsmap.get_mds_info(); + for (const auto &i : info_map) { + const auto &gid = i.first; + const auto &info = i.second; + if (pending_daemon_health_rm.count(gid)) { + continue; + } + MDSHealth health; + auto p = pending_daemon_health.find(gid); + if (p != pending_daemon_health.end()) { + health = p->second; + } else { + bufferlist bl; + mon->store->get(MDS_HEALTH_PREFIX, stringify(gid), bl); + if (!bl.length()) { + derr << "Missing health data for MDS " << gid << dendl; + continue; + } + bufferlist::iterator bl_i = bl.begin(); + health.decode(bl_i); + } + for (const auto &metric : health.metrics) { + int const rank = info.rank; + health_check_t *check = &new_checks.add( + mds_metric_name(metric.type), + metric.sev, + mds_metric_summary(metric.type)); + ostringstream ss; + ss << "mds" << info.name << "(mds." << rank << "): " << metric.message; + for (auto p = metric.metadata.begin(); + p != metric.metadata.end(); + ++p) { + if (p != metric.metadata.begin()) { + ss << ", "; + } + ss << p->first << ": " << p->second; + } + check->detail.push_back(ss.str()); + } + } + pending_fsmap.get_health_checks(&new_checks); + for (auto& p : new_checks.checks) { + p.second.summary = boost::regex_replace( + p.second.summary, + boost::regex("%num%"), + stringify(p.second.detail.size())); + p.second.summary = boost::regex_replace( + p.second.summary, + boost::regex("%plurals%"), + p.second.detail.size() > 1 ? "s" : ""); + p.second.summary = boost::regex_replace( + p.second.summary, + boost::regex("%isorare%"), + p.second.detail.size() > 1 ? "are" : "is"); + } + encode_health(new_checks, t); } version_t MDSMonitor::get_trim_to() @@ -741,8 +803,9 @@ void MDSMonitor::on_active() tick(); update_logger(); - if (mon->is_leader()) - mon->clog->info() << "fsmap " << fsmap; + if (mon->is_leader()) { + mon->clog->debug() << "fsmap " << fsmap; + } } void MDSMonitor::get_health(list >& summary, diff --git a/src/mon/MgrMonitor.cc b/src/mon/MgrMonitor.cc index 234453c7a7e2d..e1688a39681ef 100644 --- a/src/mon/MgrMonitor.cc +++ b/src/mon/MgrMonitor.cc @@ -60,6 +60,10 @@ void MgrMonitor::update_from_paxos(bool *need_bootstrap) dout(4) << "active server: " << map.active_addr << "(" << map.active_gid << ")" << dendl; + ever_had_active_mgr = get_value("ever_had_active_mgr"); + + load_health(); + if (map.available) { first_seen_inactive = utime_t(); } else { @@ -79,6 +83,27 @@ void MgrMonitor::create_pending() pending_map.epoch++; } +health_status_t MgrMonitor::should_warn_about_mgr_down() +{ + utime_t now = ceph_clock_now(); + // we warn if + // - we've ever had an active mgr, or + // - we have osds AND we've exceeded the grace period + // which means a new mon cluster and be HEALTH_OK indefinitely as long as + // no OSDs are ever created. + if (ever_had_active_mgr || + (mon->osdmon()->osdmap.get_num_osds() > 0 && + now > mon->monmap->created + g_conf->mon_mgr_mkfs_grace)) { + health_status_t level = HEALTH_WARN; + if (first_seen_inactive != utime_t() && + now - first_seen_inactive > g_conf->mon_mgr_inactive_grace) { + level = HEALTH_ERR; + } + return level; + } + return HEALTH_OK; +} + void MgrMonitor::encode_pending(MonitorDBStore::TransactionRef t) { dout(10) << __func__ << " " << pending_map << dendl; @@ -86,6 +111,20 @@ void MgrMonitor::encode_pending(MonitorDBStore::TransactionRef t) pending_map.encode(bl, mon->get_quorum_con_features()); put_version(t, pending_map.epoch, bl); put_last_committed(t, pending_map.epoch); + + health_check_map_t next; + if (pending_map.active_gid == 0) { + auto level = should_warn_about_mgr_down(); + if (level != HEALTH_OK) { + next.add("MGR_DOWN", level, "no active mgr"); + } else { + dout(10) << __func__ << " no health warning (never active and new cluster)" + << dendl; + } + } else { + put_value(t, "ever_had_active_mgr", 1); + } + encode_health(next, t); } bool MgrMonitor::check_caps(MonOpRequestRef op, const uuid_d& fsid) @@ -314,8 +353,7 @@ void MgrMonitor::send_digests() MMgrDigest *mdigest = new MMgrDigest; JSONFormatter f; - std::list health_strs; - mon->get_health(health_strs, nullptr, &f); + mon->get_health_status(true, &f, nullptr, nullptr, nullptr); f.flush(mdigest->health_json); f.reset(); @@ -343,8 +381,9 @@ void MgrMonitor::cancel_timer() void MgrMonitor::on_active() { - if (mon->is_leader()) - mon->clog->info() << "mgrmap e" << map.epoch << ": " << map; + if (mon->is_leader()) { + mon->clog->debug() << "mgrmap e" << map.epoch << ": " << map; + } } void MgrMonitor::get_health( @@ -363,7 +402,7 @@ void MgrMonitor::get_health( return; } - if (!map.available) { + if (map.active_gid == 0) { auto level = HEALTH_WARN; // do not escalate to ERR if they are still upgrading to jewel. if (mon->osdmon()->osdmap.require_osd_release >= CEPH_RELEASE_LUMINOUS) { @@ -434,11 +473,25 @@ void MgrMonitor::tick() } } + if (!pending_map.available && + should_warn_about_mgr_down() != HEALTH_OK) { + dout(10) << " exceeded mon_mgr_mkfs_grace " << g_conf->mon_mgr_mkfs_grace + << " seconds" << dendl; + propose = true; + } + if (propose) { propose_pending(); } } +void MgrMonitor::on_restart() +{ + // Clear out the leader-specific state. + last_beacon.clear(); +} + + bool MgrMonitor::promote_standby() { assert(pending_map.active_gid == 0); diff --git a/src/mon/MgrMonitor.h b/src/mon/MgrMonitor.h index ea1a0a91a4aca..0dc1af571deea 100644 --- a/src/mon/MgrMonitor.h +++ b/src/mon/MgrMonitor.h @@ -22,6 +22,7 @@ class MgrMonitor: public PaxosService { MgrMap map; MgrMap pending_map; + bool ever_had_active_mgr = false; utime_t first_seen_inactive; @@ -42,6 +43,8 @@ class MgrMonitor: public PaxosService bool check_caps(MonOpRequestRef op, const uuid_d& fsid); + health_status_t should_warn_about_mgr_down(); + public: MgrMonitor(Monitor *mn, Paxos *p, const string& service_name) : PaxosService(mn, p, service_name) @@ -76,6 +79,8 @@ class MgrMonitor: public PaxosService void send_digests(); void on_active() override; + void on_restart() override; + void get_health(list >& summary, list > *detail, CephContext *cct) const override; diff --git a/src/mon/MgrStatMonitor.cc b/src/mon/MgrStatMonitor.cc index add84e278b72a..6a0606a4be9fa 100644 --- a/src/mon/MgrStatMonitor.cc +++ b/src/mon/MgrStatMonitor.cc @@ -71,7 +71,7 @@ MonPGStatService *MgrStatMonitor::get_pg_stat_service() void MgrStatMonitor::create_initial() { - dout(10) << dendl; + dout(10) << __func__ << dendl; version = 0; service_map.epoch = 1; ::encode(service_map, pending_service_map_bl, CEPH_FEATURES_ALL); @@ -81,6 +81,7 @@ void MgrStatMonitor::update_from_paxos(bool *need_bootstrap) { version = get_last_committed(); dout(10) << " " << version << dendl; + load_health(); bufferlist bl; get_version(version, bl); if (version) { @@ -88,8 +89,6 @@ void MgrStatMonitor::update_from_paxos(bool *need_bootstrap) try { auto p = bl.begin(); ::decode(digest, p); - ::decode(health_summary, p); - ::decode(health_detail, p); ::decode(service_map, p); dout(10) << __func__ << " v" << version << " service_map e" << service_map.epoch << dendl; @@ -151,8 +150,7 @@ void MgrStatMonitor::create_pending() { dout(10) << " " << version << dendl; pending_digest = digest; - pending_health_summary = health_summary; - pending_health_detail = health_detail; + pending_health_checks = get_health_checks(); pending_service_map_bl.clear(); ::encode(service_map, pending_service_map_bl, mon->get_quorum_con_features()); } @@ -168,12 +166,12 @@ void MgrStatMonitor::encode_pending(MonitorDBStore::TransactionRef t) dout(10) << " " << version << dendl; bufferlist bl; ::encode(pending_digest, bl, mon->get_quorum_con_features()); - ::encode(pending_health_summary, bl); - ::encode(pending_health_detail, bl); assert(pending_service_map_bl.length()); bl.append(pending_service_map_bl); put_version(t, version, bl); put_last_committed(t, version); + + encode_health(pending_health_checks, t); } version_t MgrStatMonitor::get_trim_to() @@ -194,14 +192,6 @@ void MgrStatMonitor::get_health(list >& summary, list > *detail, CephContext *cct) const { - if (mon->osdmon()->osdmap.require_osd_release < CEPH_RELEASE_LUMINOUS) { - return; - } - - summary.insert(summary.end(), health_summary.begin(), health_summary.end()); - if (detail) { - detail->insert(detail->end(), health_detail.begin(), health_detail.end()); - } } void MgrStatMonitor::tick() @@ -254,12 +244,12 @@ bool MgrStatMonitor::prepare_report(MonOpRequestRef op) bufferlist bl = m->get_data(); auto p = bl.begin(); ::decode(pending_digest, p); - dout(10) << __func__ << " " << pending_digest << dendl; - pending_health_summary.swap(m->health_summary); - pending_health_detail.swap(m->health_detail); + pending_health_checks.swap(m->health_checks); if (m->service_map_bl.length()) { pending_service_map_bl.swap(m->service_map_bl); } + dout(10) << __func__ << " " << pending_digest << ", " + << pending_health_checks.checks.size() << " health checks" << dendl; return true; } diff --git a/src/mon/MgrStatMonitor.h b/src/mon/MgrStatMonitor.h index ee58e293012f5..c1846a5447798 100644 --- a/src/mon/MgrStatMonitor.h +++ b/src/mon/MgrStatMonitor.h @@ -15,14 +15,11 @@ class MgrStatMonitor : public PaxosService { // live version version_t version = 0; PGMapDigest digest; - list> health_summary; - list> health_detail; ServiceMap service_map; // pending commit PGMapDigest pending_digest; - list> pending_health_summary; - list> pending_health_detail; + health_check_map_t pending_health_checks; bufferlist pending_service_map_bl; std::unique_ptr pgservice; diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h index 8d974660f29d0..569c52760a353 100644 --- a/src/mon/MonCommands.h +++ b/src/mon/MonCommands.h @@ -210,6 +210,7 @@ COMMAND_WITH_FLAG("injectargs " \ COMMAND("status", "show cluster status", "mon", "r", "cli,rest") COMMAND("health name=detail,type=CephChoices,strings=detail,req=false", \ "show cluster health", "mon", "r", "cli,rest") +COMMAND("time-sync-status", "show time sync status", "mon", "r", "cli,rest") COMMAND("df name=detail,type=CephChoices,strings=detail,req=false", \ "show cluster free space stats", "mon", "r", "cli,rest") COMMAND("report name=tags,type=CephString,n=N,req=false", \ diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc index f4593f58499b3..9421b4cbf14fc 100644 --- a/src/mon/Monitor.cc +++ b/src/mon/Monitor.cc @@ -77,6 +77,7 @@ #include "MgrMonitor.h" #include "MgrStatMonitor.h" #include "mon/QuorumService.h" +#include "mon/OldHealthMonitor.h" #include "mon/HealthMonitor.h" #include "mon/ConfigKeyService.h" #include "common/config.h" @@ -204,8 +205,9 @@ Monitor::Monitor(CephContext* cct_, string nm, MonitorDBStore *s, paxos_service[PAXOS_AUTH] = new AuthMonitor(this, paxos, "auth"); paxos_service[PAXOS_MGR] = new MgrMonitor(this, paxos, "mgr"); paxos_service[PAXOS_MGRSTAT] = new MgrStatMonitor(this, paxos, "mgrstat"); + paxos_service[PAXOS_HEALTH] = new HealthMonitor(this, paxos, "health"); - health_monitor = new HealthMonitor(this); + health_monitor = new OldHealthMonitor(this); config_key_service = new ConfigKeyService(this, paxos); mon_caps = new MonCap(); @@ -2422,27 +2424,180 @@ void Monitor::do_health_to_clog(bool force) dout(10) << __func__ << (force ? " (force)" : "") << dendl; - list status; - health_status_t overall = get_health(status, NULL, NULL); + if (osdmon()->osdmap.require_osd_release >= CEPH_RELEASE_LUMINOUS) { + string summary; + health_status_t level = get_health_status(false, nullptr, &summary); + if (!force && + summary == health_status_cache.summary && + level == health_status_cache.overall) + return; + if (level == HEALTH_OK) + clog->info() << "overall " << summary; + else if (level == HEALTH_WARN) + clog->warn() << "overall " << summary; + else if (level == HEALTH_ERR) + clog->error() << "overall " << summary; + else + ceph_abort(); + health_status_cache.summary = summary; + health_status_cache.overall = level; + } else { + // for jewel only + list status; + health_status_t overall = get_health(status, NULL, NULL); + dout(25) << __func__ + << (force ? " (force)" : "") + << dendl; - dout(25) << __func__ - << (force ? " (force)" : "") - << dendl; + string summary = joinify(status.begin(), status.end(), string("; ")); + + if (!force && + overall == health_status_cache.overall && + !health_status_cache.summary.empty() && + health_status_cache.summary == summary) { + // we got a dup! + return; + } + + clog->info() << summary; + + health_status_cache.overall = overall; + health_status_cache.summary = summary; + } +} + +health_status_t Monitor::get_health_status( + bool want_detail, + Formatter *f, + std::string *plain, + const char *sep1, + const char *sep2) +{ + health_status_t r = HEALTH_OK; + bool compat = g_conf->mon_health_preluminous_compat; + if (f) { + f->open_object_section("health"); + f->open_object_section("checks"); + } + + string summary; + string *psummary = f ? nullptr : &summary; + for (auto& svc : paxos_service) { + r = std::min(r, svc->get_health_checks().dump_summary( + f, psummary, sep2, want_detail)); + } + + if (f) { + f->close_section(); + f->dump_stream("status") << r; + } else { + // one-liner: HEALTH_FOO[ thing1[; thing2 ...]] + *plain = stringify(r); + if (summary.size()) { + *plain += sep1; + *plain += summary; + } + *plain += "\n"; + } - string summary = joinify(status.begin(), status.end(), string("; ")); + if (f && compat) { + f->open_array_section("summary"); + for (auto& svc : paxos_service) { + svc->get_health_checks().dump_summary_compat(f); + } + f->close_section(); + f->dump_stream("overall_status") << r; + } - if (!force && - overall == health_status_cache.overall && - !health_status_cache.summary.empty() && - health_status_cache.summary == summary) { - // we got a dup! + if (want_detail) { + if (f && compat) { + f->open_array_section("detail"); + } + + for (auto& svc : paxos_service) { + svc->get_health_checks().dump_detail(f, plain, compat); + } + + if (f && compat) { + f->close_section(); + } + } + if (f) { + f->close_section(); + } + return r; +} + +void Monitor::log_health( + const health_check_map_t& updated, + const health_check_map_t& previous, + MonitorDBStore::TransactionRef t) +{ + if (!g_conf->mon_health_to_clog) { return; } + // FIXME: log atomically as part of @t instead of using clog. + dout(10) << __func__ << " updated " << updated.checks.size() + << " previous " << previous.checks.size() + << dendl; + for (auto& p : updated.checks) { + auto q = previous.checks.find(p.first); + if (q == previous.checks.end()) { + // new + ostringstream ss; + ss << "Health check failed: " << p.second.summary << " (" + << p.first << ")"; + if (p.second.severity == HEALTH_WARN) + clog->warn() << ss.str(); + else + clog->error() << ss.str(); + } else { + if (p.second.summary != q->second.summary || + p.second.severity != q->second.severity) { + // summary or severity changed (ignore detail changes at this level) + ostringstream ss; + ss << "Health check update: " << p.second.summary << " (" << p.first << ")"; + if (p.second.severity == HEALTH_WARN) + clog->warn() << ss.str(); + else + clog->error() << ss.str(); + } + } + } + for (auto& p : previous.checks) { + if (!updated.checks.count(p.first)) { + // cleared + ostringstream ss; + if (p.first == "DEGRADED_OBJECTS") { + clog->info() << "All degraded objects recovered"; + } else if (p.first == "OSD_FLAGS") { + clog->info() << "OSD flags cleared"; + } else { + clog->info() << "Health check cleared: " << p.first << " (was: " + << p.second.summary << ")"; + } + } + } - clog->info() << summary; + if (previous.checks.size() && updated.checks.size() == 0) { + // We might be going into a fully healthy state, check + // other subsystems + bool any_checks = false; + for (auto& svc : paxos_service) { + if (&(svc->get_health_checks()) == &(previous)) { + // Ignore the ones we're clearing right now + continue; + } - health_status_cache.overall = overall; - health_status_cache.summary = summary; + if (svc->get_health_checks().checks.size() > 0) { + any_checks = true; + break; + } + } + if (!any_checks) { + clog->info() << "Cluster is now healthy"; + } + } } health_status_t Monitor::get_health(list& status, @@ -2462,52 +2617,29 @@ health_status_t Monitor::get_health(list& status, s->get_health(summary, detailbl ? &detail : NULL, cct); } - health_monitor->get_health(f, summary, (detailbl ? &detail : NULL)); - - if (f) { - f->open_object_section("timechecks"); - f->dump_unsigned("epoch", get_epoch()); - f->dump_int("round", timecheck_round); - f->dump_stream("round_status") - << ((timecheck_round%2) ? "on-going" : "finished"); - } + health_monitor->get_health(summary, (detailbl ? &detail : NULL)); health_status_t overall = HEALTH_OK; if (!timecheck_skews.empty()) { list warns; - if (f) - f->open_array_section("mons"); for (map::iterator i = timecheck_skews.begin(); i != timecheck_skews.end(); ++i) { entity_inst_t inst = i->first; double skew = i->second; double latency = timecheck_latencies[inst]; string name = monmap->get_name(inst.addr); - ostringstream tcss; health_status_t tcstatus = timecheck_status(tcss, skew, latency); if (tcstatus != HEALTH_OK) { if (overall > tcstatus) overall = tcstatus; warns.push_back(name); - ostringstream tmp_ss; tmp_ss << "mon." << name << " addr " << inst.addr << " " << tcss.str() << " (latency " << latency << "s)"; detail.push_back(make_pair(tcstatus, tmp_ss.str())); } - - if (f) { - f->open_object_section("mon"); - f->dump_string("name", name.c_str()); - f->dump_float("skew", skew); - f->dump_float("latency", latency); - f->dump_stream("health") << tcstatus; - if (tcstatus != HEALTH_OK) - f->dump_stream("details") << tcss.str(); - f->close_section(); - } } if (!warns.empty()) { ostringstream ss; @@ -2521,11 +2653,7 @@ health_status_t Monitor::get_health(list& status, status.push_back(ss.str()); summary.push_back(make_pair(HEALTH_WARN, "Monitor clock skew detected ")); } - if (f) - f->close_section(); } - if (f) - f->close_section(); if (f) f->open_array_section("summary"); @@ -2577,12 +2705,9 @@ void Monitor::get_cluster_status(stringstream &ss, Formatter *f) if (f) f->open_object_section("status"); - // reply with the status for all the components - list health; - get_health(health, NULL, f); - if (f) { f->dump_stream("fsid") << monmap->get_fsid(); + get_health_status(false, f, nullptr); f->dump_unsigned("election_epoch", get_epoch()); { f->open_array_section("quorum"); @@ -2606,7 +2731,6 @@ void Monitor::get_cluster_status(stringstream &ss, Formatter *f) f->open_object_section("fsmap"); mdsmon()->get_fsmap().print_summary(f, NULL); f->close_section(); - f->open_object_section("mgrmap"); mgrmon()->get_map().print_summary(f, nullptr); f->close_section(); @@ -2614,11 +2738,21 @@ void Monitor::get_cluster_status(stringstream &ss, Formatter *f) f->dump_object("servicemap", mgrstatmon()->get_service_map()); f->close_section(); } else { - ss << " cluster:\n"; ss << " id: " << monmap->get_fsid() << "\n"; - ss << " health: " << joinify(health.begin(), health.end(), - string("\n ")) << "\n"; + + string health; + if (osdmon()->osdmap.require_osd_release >= CEPH_RELEASE_LUMINOUS) { + get_health_status(false, nullptr, &health, + "\n ", "\n "); + } else { + list ls; + get_health(ls, NULL, f); + health = joinify(ls.begin(), ls.end(), + string("\n ")); + } + ss << " health: " << health << "\n"; + ss << "\n \n services:\n"; { size_t maxlen = 3; @@ -3089,6 +3223,40 @@ void Monitor::handle_command(MonOpRequestRef op) rs = "must supply options to be parsed in a single string"; r = -EINVAL; } + } else if (prefix == "time-sync-status") { + if (!f) + f.reset(Formatter::create("json-pretty")); + f->open_object_section("time_sync"); + if (!timecheck_skews.empty()) { + f->open_object_section("time_skew_status"); + for (auto& i : timecheck_skews) { + entity_inst_t inst = i.first; + double skew = i.second; + double latency = timecheck_latencies[inst]; + string name = monmap->get_name(inst.addr); + ostringstream tcss; + health_status_t tcstatus = timecheck_status(tcss, skew, latency); + f->open_object_section(name.c_str()); + f->dump_float("skew", skew); + f->dump_float("latency", latency); + f->dump_stream("health") << tcstatus; + if (tcstatus != HEALTH_OK) { + f->dump_stream("details") << tcss.str(); + } + f->close_section(); + } + f->close_section(); + } + f->open_object_section("timechecks"); + f->dump_unsigned("epoch", get_epoch()); + f->dump_int("round", timecheck_round); + f->dump_stream("round_status") << ((timecheck_round%2) ? + "on-going" : "finished"); + f->close_section(); + f->close_section(); + f->flush(rdata); + r = 0; + rs = ""; } else if (prefix == "status" || prefix == "health" || prefix == "df") { @@ -3105,25 +3273,35 @@ void Monitor::handle_command(MonOpRequestRef op) } rdata.append(ds); } else if (prefix == "health") { - list health_str; - get_health(health_str, detail == "detail" ? &rdata : NULL, f.get()); - if (f) { - f->flush(ds); - ds << '\n'; + if (osdmon()->osdmap.require_osd_release >= CEPH_RELEASE_LUMINOUS) { + string plain; + get_health_status(detail == "detail", f.get(), f ? nullptr : &plain); + if (f) { + f->flush(rdata); + } else { + rdata.append(plain); + } } else { - assert(!health_str.empty()); - ds << health_str.front(); - health_str.pop_front(); - if (!health_str.empty()) { - ds << ' '; - ds << joinify(health_str.begin(), health_str.end(), string("; ")); + list health_str; + get_health(health_str, detail == "detail" ? &rdata : NULL, f.get()); + if (f) { + f->flush(ds); + ds << '\n'; + } else { + assert(!health_str.empty()); + ds << health_str.front(); + health_str.pop_front(); + if (!health_str.empty()) { + ds << ' '; + ds << joinify(health_str.begin(), health_str.end(), string("; ")); + } } + bufferlist comb; + comb.append(ds); + if (detail == "detail") + comb.append(rdata); + rdata = comb; } - bufferlist comb; - comb.append(ds); - if (detail == "detail") - comb.append(rdata); - rdata = comb; } else if (prefix == "df") { bool verbose = (detail == "detail"); if (f) @@ -4112,6 +4290,11 @@ void Monitor::dispatch_op(MonOpRequestRef op) health_monitor->dispatch(op); break; + case MSG_MON_HEALTH_CHECKS: + op->set_type_service(); + paxos_service[PAXOS_HEALTH]->dispatch(op); + break; + default: dealt_with = false; break; diff --git a/src/mon/Monitor.h b/src/mon/Monitor.h index ac3f3f0b01838..fa7f9e9acdd32 100644 --- a/src/mon/Monitor.h +++ b/src/mon/Monitor.h @@ -27,10 +27,12 @@ #include #include "include/types.h" +#include "include/health.h" #include "msg/Messenger.h" #include "common/Timer.h" +#include "health_check.h" #include "MonMap.h" #include "Elector.h" #include "Paxos.h" @@ -496,6 +498,7 @@ class Monitor : public Dispatcher, version_t timecheck_round; unsigned int timecheck_acks; utime_t timecheck_round_start; + friend class HealthMonitor; /* When we hit a skew we will start a new round based off of * 'mon_timecheck_skew_interval'. Each new round will be backed off * until we hit 'mon_timecheck_interval' -- which is the typical @@ -648,6 +651,10 @@ class Monitor : public Dispatcher, return (class MgrStatMonitor*) paxos_service[PAXOS_MGRSTAT]; } + class MgrStatMonitor *healthmon() { + return (class MgrStatMonitor*) paxos_service[PAXOS_MGRSTAT]; + } + friend class Paxos; friend class OSDMonitor; friend class MDSMonitor; @@ -737,6 +744,18 @@ class Monitor : public Dispatcher, */ health_status_t get_health(list& status, bufferlist *detailbl, Formatter *f); + + health_status_t get_health_status( + bool want_detail, + Formatter *f, + std::string *plain, + const char *sep1 = " ", + const char *sep2 = "; "); + void log_health( + const health_check_map_t& updated, + const health_check_map_t& previous, + MonitorDBStore::TransactionRef t); + void get_cluster_status(stringstream &ss, Formatter *f); void reply_command(MonOpRequestRef op, int rc, const string &rs, version_t version); diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index 05177b1f71e6c..a39f58ce21498 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -17,6 +17,8 @@ */ #include +#include +#include #include #include "mon/OSDMonitor.h" @@ -275,6 +277,8 @@ void OSDMonitor::update_from_paxos(bool *need_bootstrap) mapping_job.reset(); } + load_health(); + /* * We will possibly have a stashed latest that *we* wrote, and we will * always be sure to have the oldest full map in the first..last range @@ -532,7 +536,7 @@ void OSDMonitor::on_active() update_logger(); if (mon->is_leader()) { - mon->clog->info() << "osdmap " << osdmap; + mon->clog->debug() << "osdmap " << osdmap; } else { list ls; take_all_failures(ls); @@ -1101,6 +1105,11 @@ void OSDMonitor::encode_pending(MonitorDBStore::TransactionRef t) ::encode(pending_creatings, creatings_bl); t->put(OSD_PG_CREATING_PREFIX, "creating", creatings_bl); } + + // health + health_check_map_t next; + tmp.check_health(&next); + encode_health(next, t); } void OSDMonitor::trim_creating_pgs(creating_pgs_t* creating_pgs, @@ -3246,7 +3255,8 @@ void OSDMonitor::tick() do_propose = true; - mon->clog->info() << "osd." << o << " out (down for " << down << ")"; + mon->clog->info() << "Marking osd." << o << " out (has been down for " + << int(down.sec()) << " seconds)"; } else continue; } @@ -3365,7 +3375,7 @@ void OSDMonitor::get_health(list >& summary, osds.insert(i); } continue; - } + } if (osdmap.is_out(i)) continue; ++num_in_osds; @@ -3776,7 +3786,17 @@ void OSDMonitor::get_health(list >& summary, } } - get_pools_health(summary, detail); + for (auto it : osdmap.get_pools()) { + const pg_pool_t &pool = it.second; + if (pool.has_flag(pg_pool_t::FLAG_FULL)) { + const string& pool_name = osdmap.get_pool_name(it.first); + stringstream ss; + ss << "pool '" << pool_name << "' is full"; + summary.push_back(make_pair(HEALTH_WARN, ss.str())); + if (detail) + detail->push_back(make_pair(HEALTH_WARN, ss.str())); + } + } } } @@ -5038,90 +5058,6 @@ bool OSDMonitor::update_pools_status() return ret; } -void OSDMonitor::get_pools_health( - list >& summary, - list > *detail) const -{ - auto& pools = osdmap.get_pools(); - for (auto it = pools.begin(); it != pools.end(); ++it) { - const pool_stat_t *pstat = mon->pgservice->get_pool_stat(it->first); - if (!pstat) - continue; - const object_stat_sum_t& sum = pstat->stats.sum; - const pg_pool_t &pool = it->second; - const string& pool_name = osdmap.get_pool_name(it->first); - - if (pool.has_flag(pg_pool_t::FLAG_FULL)) { - // uncomment these asserts if/when we update the FULL flag on pg_stat update - //assert((pool.quota_max_objects > 0) || (pool.quota_max_bytes > 0)); - - stringstream ss; - ss << "pool '" << pool_name << "' is full"; - summary.push_back(make_pair(HEALTH_WARN, ss.str())); - if (detail) - detail->push_back(make_pair(HEALTH_WARN, ss.str())); - } - - float warn_threshold = (float)g_conf->mon_pool_quota_warn_threshold/100; - float crit_threshold = (float)g_conf->mon_pool_quota_crit_threshold/100; - - if (pool.quota_max_objects > 0) { - stringstream ss; - health_status_t status = HEALTH_OK; - if ((uint64_t)sum.num_objects >= pool.quota_max_objects) { - // uncomment these asserts if/when we update the FULL flag on pg_stat update - //assert(pool.has_flag(pg_pool_t::FLAG_FULL)); - } else if (crit_threshold > 0 && - sum.num_objects >= pool.quota_max_objects*crit_threshold) { - ss << "pool '" << pool_name - << "' has " << sum.num_objects << " objects" - << " (max " << pool.quota_max_objects << ")"; - status = HEALTH_ERR; - } else if (warn_threshold > 0 && - sum.num_objects >= pool.quota_max_objects*warn_threshold) { - ss << "pool '" << pool_name - << "' has " << sum.num_objects << " objects" - << " (max " << pool.quota_max_objects << ")"; - status = HEALTH_WARN; - } - if (status != HEALTH_OK) { - pair s(status, ss.str()); - summary.push_back(s); - if (detail) - detail->push_back(s); - } - } - - if (pool.quota_max_bytes > 0) { - health_status_t status = HEALTH_OK; - stringstream ss; - if ((uint64_t)sum.num_bytes >= pool.quota_max_bytes) { - // uncomment these asserts if/when we update the FULL flag on pg_stat update - //assert(pool.has_flag(pg_pool_t::FLAG_FULL)); - } else if (crit_threshold > 0 && - sum.num_bytes >= pool.quota_max_bytes*crit_threshold) { - ss << "pool '" << pool_name - << "' has " << si_t(sum.num_bytes) << " bytes" - << " (max " << si_t(pool.quota_max_bytes) << ")"; - status = HEALTH_ERR; - } else if (warn_threshold > 0 && - sum.num_bytes >= pool.quota_max_bytes*warn_threshold) { - ss << "pool '" << pool_name - << "' has " << si_t(sum.num_bytes) << " bytes" - << " (max " << si_t(pool.quota_max_bytes) << ")"; - status = HEALTH_WARN; - } - if (status != HEALTH_OK) { - pair s(status, ss.str()); - summary.push_back(s); - if (detail) - detail->push_back(s); - } - } - } -} - - int OSDMonitor::prepare_new_pool(MonOpRequestRef op) { op->mark_osdmon_event(__func__); @@ -8468,6 +8404,17 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, pending_inc.new_xinfo[osd].old_weight = osdmap.osd_weight[osd]; } ss << "marked out osd." << osd << ". "; + std::ostringstream msg; + msg << "Client " << op->get_session()->entity_name + << " marked osd." << osd << " out"; + if (osdmap.is_up(osd)) { + msg << ", while it was still marked up"; + } else { + msg << ", after it was down for " << int(down_pending_out[osd].sec()) + << " seconds"; + } + + mon->clog->info() << msg.str(); any = true; } } else if (prefix == "osd in") { diff --git a/src/mon/OSDMonitor.h b/src/mon/OSDMonitor.h index 3a9a27f5c37fa..9a944107970b5 100644 --- a/src/mon/OSDMonitor.h +++ b/src/mon/OSDMonitor.h @@ -359,8 +359,6 @@ class OSDMonitor : public PaxosService { void update_pool_flags(int64_t pool_id, uint64_t flags); bool update_pools_status(); - void get_pools_health(list >& summary, - list > *detail) const; bool prepare_set_flag(MonOpRequestRef op, int flag); bool prepare_unset_flag(MonOpRequestRef op, int flag); diff --git a/src/mon/OldHealthMonitor.cc b/src/mon/OldHealthMonitor.cc new file mode 100644 index 0000000000000..d7264a7ee26bd --- /dev/null +++ b/src/mon/OldHealthMonitor.cc @@ -0,0 +1,107 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2013 Inktank, Inc + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include +#include +#include + +// #include +// Because intusive_ptr clobbers our assert... +#include "include/assert.h" + +#include "mon/Monitor.h" +#include "mon/HealthService.h" +#include "mon/OldHealthMonitor.h" +#include "mon/DataHealthService.h" + +#include "messages/MMonHealth.h" +#include "common/Formatter.h" +// #include "common/config.h" + +#define dout_subsys ceph_subsys_mon +#undef dout_prefix +#define dout_prefix _prefix(_dout, mon, this) +static ostream& _prefix(std::ostream *_dout, const Monitor *mon, + const OldHealthMonitor *hmon) { + return *_dout << "mon." << mon->name << "@" << mon->rank + << "(" << mon->get_state_name() << ")." << hmon->get_name() + << "(" << hmon->get_epoch() << ") "; +} + +void OldHealthMonitor::init() +{ + dout(10) << __func__ << dendl; + assert(services.empty()); + services[HealthService::SERVICE_HEALTH_DATA] = new DataHealthService(mon); + + for (map::iterator it = services.begin(); + it != services.end(); + ++it) { + it->second->init(); + } +} + +bool OldHealthMonitor::service_dispatch(MonOpRequestRef op) +{ + assert(op->get_req()->get_type() == MSG_MON_HEALTH); + MMonHealth *hm = static_cast(op->get_req()); + int service_type = hm->get_service_type(); + if (services.count(service_type) == 0) { + dout(1) << __func__ << " service type " << service_type + << " not registered -- drop message!" << dendl; + return false; + } + return services[service_type]->service_dispatch(op); +} + +void OldHealthMonitor::start_epoch() { + epoch_t epoch = get_epoch(); + for (map::iterator it = services.begin(); + it != services.end(); ++it) { + it->second->start(epoch); + } +} + +void OldHealthMonitor::finish_epoch() { + generic_dout(20) << "OldHealthMonitor::finish_epoch()" << dendl; + for (map::iterator it = services.begin(); + it != services.end(); ++it) { + assert(it->second != NULL); + it->second->finish(); + } +} + +void OldHealthMonitor::service_shutdown() +{ + dout(0) << "OldHealthMonitor::service_shutdown " + << services.size() << " services" << dendl; + for (map::iterator it = services.begin(); + it != services.end(); + ++it) { + it->second->shutdown(); + delete it->second; + } + services.clear(); +} + +void OldHealthMonitor::get_health( + list >& summary, + list > *detail) +{ + for (map::iterator it = services.begin(); + it != services.end(); + ++it) { + it->second->get_health(summary, detail); + } +} diff --git a/src/mon/OldHealthMonitor.h b/src/mon/OldHealthMonitor.h new file mode 100644 index 0000000000000..f295693611b25 --- /dev/null +++ b/src/mon/OldHealthMonitor.h @@ -0,0 +1,66 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2013 Inktank, Inc + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ +#ifndef CEPH_MON_OLDHEALTHMONITOR_H +#define CEPH_MON_OLDHEALTHMONITOR_H + +#include "mon/QuorumService.h" + +//forward declaration +namespace ceph { class Formatter; } +class HealthService; + +class OldHealthMonitor : public QuorumService +{ + map services; + +protected: + void service_shutdown() override; + +public: + OldHealthMonitor(Monitor *m) : QuorumService(m) { } + ~OldHealthMonitor() override { + assert(services.empty()); + } + + + /** + * @defgroup OldHealthMonitor_Inherited_h Inherited abstract methods + * @{ + */ + void init() override; + void get_health(list >& summary, + list > *detail) override; + bool service_dispatch(MonOpRequestRef op) override; + + void start_epoch() override; + + void finish_epoch() override; + + void cleanup() override { } + void service_tick() override { } + + int get_type() override { + return QuorumService::SERVICE_HEALTH; + } + + string get_name() const override { + return "health"; + } + + /** + * @} // OldHealthMonitor_Inherited_h + */ +}; + +#endif diff --git a/src/mon/PGMap.cc b/src/mon/PGMap.cc index 88add444b75cd..913e035f7ef21 100644 --- a/src/mon/PGMap.cc +++ b/src/mon/PGMap.cc @@ -1,6 +1,8 @@ // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- // vim: ts=8 sw=2 smarttab +#include + #include "PGMap.h" #define dout_subsys ceph_subsys_mon @@ -2548,7 +2550,671 @@ namespace { ss << pgs_count << " unscrubbed pgs"; summary.push_back(make_pair(HEALTH_WARN, ss.str())); } + } +} + +void PGMap::get_health_checks( + CephContext *cct, + const OSDMap& osdmap, + health_check_map_t *checks) const +{ + utime_t now = ceph_clock_now(); + const unsigned max = cct->_conf->mon_health_max_detail; + const auto& pools = osdmap.get_pools(); + + checks->clear(); + + typedef enum pg_consequence_t { + UNAVAILABLE = 1, // Client IO to the pool may block + DEGRADED = 2, // Fewer than the requested number of replicas are present + DEGRADED_FULL = 3, // Fewer than the request number of replicas may be present + // and insufficiet resources are present to fix this + DAMAGED = 4 // The data may be missing or inconsistent on disk and + // requires repair + } pg_consequence_t; + + // For a given PG state, how should it be reported at the pool level? + class PgStateResponse { + public: + pg_consequence_t consequence; + typedef std::function< utime_t(const pg_stat_t&) > stuck_cb; + stuck_cb stuck_since; + bool invert; + + PgStateResponse(const pg_consequence_t &c, stuck_cb s) + : consequence(c), stuck_since(s), invert(false) + { + } + + PgStateResponse(const pg_consequence_t &c, stuck_cb s, bool i) + : consequence(c), stuck_since(s), invert(i) + { + } + }; + + // Record the PG state counts that contributed to a reported pool state + class PgCauses { + public: + // Map of PG_STATE_* to number of pgs in that state. + std::map states; + + // List of all PG IDs that had a state contributing + // to this health condition. + std::set pgs; + + std::map pg_messages; + }; + + // Map of PG state to how to respond to it + std::map state_to_response = { + // Immediate reports + { PG_STATE_INCONSISTENT, {DAMAGED, {}} }, + { PG_STATE_INCOMPLETE, {DEGRADED, {}} }, + { PG_STATE_REPAIR, {DAMAGED, {}} }, + { PG_STATE_SNAPTRIM_ERROR, {DAMAGED, {}} }, + { PG_STATE_BACKFILL_TOOFULL, {DEGRADED, {}} }, + { PG_STATE_RECOVERY_TOOFULL, {DEGRADED, {}} }, + { PG_STATE_DEGRADED, {DEGRADED, {}} }, + { PG_STATE_DOWN, {UNAVAILABLE, {}} }, + // Delayed (wait until stuck) reports + { PG_STATE_PEERING, {UNAVAILABLE, [](const pg_stat_t &p){return p.last_peered;} } }, + { PG_STATE_UNDERSIZED, {DEGRADED, [](const pg_stat_t &p){return p.last_fullsized;} } }, + { PG_STATE_STALE, {UNAVAILABLE, [](const pg_stat_t &p){return p.last_unstale;} } }, + // Delayed and inverted reports + { PG_STATE_ACTIVE, {UNAVAILABLE, [](const pg_stat_t &p){return p.last_active;}, true} }, + { PG_STATE_CLEAN, {DEGRADED, [](const pg_stat_t &p){return p.last_clean;}, true} } + }; + + // Specialized state printer that takes account of inversion of + // ACTIVE, CLEAN checks. + auto state_name = [](const uint32_t &state) { + // Special cases for the states that are inverted checks + if (state == PG_STATE_CLEAN) { + return std::string("unclean"); + } else if (state == PG_STATE_ACTIVE) { + return std::string("inactive"); + } else { + return pg_state_string(state); + } + }; + + // Map of what is wrong to information about why, implicitly also stores + // the list of what is wrong. + std::map detected; + + // Optimisation: trim down the number of checks to apply based on + // the summary counters + std::map possible_responses; + for (const auto &i : num_pg_by_state) { + for (const auto &j : state_to_response) { + if (!j.second.invert) { + // Check for normal tests by seeing if any pgs have the flag + if (i.first & j.first) { + possible_responses.insert(j); + } + } + } + } + + for (const auto &j : state_to_response) { + if (j.second.invert) { + // Check for inverted tests by seeing if not-all pgs have the flag + const auto &found = num_pg_by_state.find(j.first); + if (found == num_pg_by_state.end() || found->second != num_pg) { + possible_responses.insert(j); + } + } + } + + utime_t cutoff = now - utime_t(cct->_conf->mon_pg_stuck_threshold, 0); + // Loop over all PGs, if there are any possibly-unhealthy states in there + if (!possible_responses.empty()) { + for (const auto& i : pg_stat) { + const auto &pg_id = i.first; + const auto &pg_info = i.second; + + for (const auto &j : state_to_response) { + const auto &pg_response_state = j.first; + const auto &pg_response = j.second; + + // Apply the state test + if (!(bool(pg_info.state & pg_response_state) != pg_response.invert)) { + continue; + } + + // Apply stuckness test if needed + if (pg_response.stuck_since) { + // Delayed response, check for stuckness + utime_t last_whatever = pg_response.stuck_since(pg_info); + if (last_whatever >= cutoff) { + // Not stuck enough, ignore. + continue; + } else { + + } + } + + auto &causes = detected[pg_response.consequence]; + causes.states[pg_response_state]++; + causes.pgs.insert(pg_id); + + // Don't bother composing detail string if we have already recorded + // too many + if (causes.pg_messages.size() > max) { + continue; + } + + std::ostringstream ss; + if (pg_response.stuck_since) { + utime_t since = pg_response.stuck_since(pg_info); + ss << "pg " << pg_id << " is stuck " << state_name(pg_response_state); + if (since == utime_t()) { + ss << " since forever"; + } else { + utime_t dur = now - since; + ss << " for " << dur; + } + ss << ", current state " << pg_state_string(pg_info.state) + << ", last acting " << pg_info.acting; + } else { + ss << "pg " << pg_id << " is " + << pg_state_string(pg_info.state); + ss << ", acting " << pg_info.acting; + if (pg_info.stats.sum.num_objects_unfound) { + ss << ", " << pg_info.stats.sum.num_objects_unfound + << " unfound"; + } + } + + if (pg_info.state & PG_STATE_INCOMPLETE) { + const pg_pool_t *pi = osdmap.get_pg_pool(pg_id.pool()); + if (pi && pi->min_size > 1) { + ss << " (reducing pool " + << osdmap.get_pool_name(pg_id.pool()) + << " min_size from " << (int)pi->min_size + << " may help; search ceph.com/docs for 'incomplete')"; + } + } + + causes.pg_messages[pg_id] = ss.str(); + } + } + } else { + dout(10) << __func__ << " skipping loop over PGs: counters look OK" << dendl; + } + + for (const auto &i : detected) { + std::string health_code; + health_status_t sev; + std::string summary; + switch(i.first) { + case UNAVAILABLE: + health_code = "PG_AVAILABILITY"; + sev = HEALTH_WARN; + summary = "Reduced data availability: "; + break; + case DEGRADED: + health_code = "PG_DEGRADED"; + summary = "Degraded data redundancy: "; + sev = HEALTH_WARN; + break; + case DEGRADED_FULL: + health_code = "PG_DEGRADED_FULL"; + summary = "Degraded data redundancy (low space): "; + sev = HEALTH_ERR; + break; + case DAMAGED: + health_code = "PG_DAMAGED"; + summary = "Possible data damage: "; + sev = HEALTH_ERR; + break; + default: + assert(false); + } + + if (i.first == DEGRADED) { + if (pg_sum.stats.sum.num_objects_degraded && + pg_sum.stats.sum.num_object_copies > 0) { + double pc = (double)pg_sum.stats.sum.num_objects_degraded / + (double)pg_sum.stats.sum.num_object_copies * (double)100.0; + char b[20]; + snprintf(b, sizeof(b), "%.3lf", pc); + ostringstream ss; + ss << pg_sum.stats.sum.num_objects_degraded + << "/" << pg_sum.stats.sum.num_object_copies << " objects degraded (" + << b << "%)"; + + // Throw in a comma for the benefit of the following PG counts + summary += ss.str() + ", "; + } + } + + // Compose summary message saying how many PGs in what states led + // to this health check failing + std::vector pg_msgs; + for (const auto &j : i.second.states) { + std::ostringstream msg; + msg << j.second << (j.second > 1 ? " pgs " : " pg ") << state_name(j.first); + pg_msgs.push_back(msg.str()); + } + summary += joinify(pg_msgs.begin(), pg_msgs.end(), std::string(", ")); + + + + health_check_t *check = &checks->add( + health_code, + sev, + summary); + + // Compose list of PGs contributing to this health check failing + for (const auto &j : i.second.pg_messages) { + check->detail.push_back(j.second); + } + } + + // OSD_SKEWED_USAGE + if (cct->_conf->mon_warn_osd_usage_min_max_delta) { + int max_osd = -1, min_osd = -1; + float max_osd_usage = 0.0, min_osd_usage = 1.0; + for (auto p = osd_stat.begin(); p != osd_stat.end(); ++p) { + // kb should never be 0, but avoid divide by zero in case of corruption + if (p->second.kb <= 0) + continue; + float usage = ((float)p->second.kb_used) / ((float)p->second.kb); + if (usage > max_osd_usage) { + max_osd_usage = usage; + max_osd = p->first; + } + if (usage < min_osd_usage) { + min_osd_usage = usage; + min_osd = p->first; + } + } + float diff = max_osd_usage - min_osd_usage; + if (diff > cct->_conf->mon_warn_osd_usage_min_max_delta) { + auto& d = checks->add("OSD_SKEWED_USAGE", HEALTH_WARN, + "skewed osd utilization"); + ostringstream ss; + ss << "difference between min (osd." << min_osd << " at " + << roundf(min_osd_usage*1000.0)/100.0 + << "%) and max (osd." << max_osd << " at " + << roundf(max_osd_usage*1000.0)/100.0 + << "%) osd usage " << roundf(diff*1000.0)/100.0 << "% > " + << roundf(cct->_conf->mon_warn_osd_usage_min_max_delta*1000.0)/100.0 + << " (mon_warn_osd_usage_min_max_delta)"; + d.detail.push_back(ss.str()); + } + } + + // OSD_SCRUB_ERRORS + if (pg_sum.stats.sum.num_scrub_errors) { + ostringstream ss; + ss << pg_sum.stats.sum.num_scrub_errors << " scrub errors"; + checks->add("OSD_SCRUB_ERRORS", HEALTH_ERR, ss.str()); + } + + // CACHE_POOL_NEAR_FULL + { + list detail; + unsigned num_pools = 0; + for (auto& p : pools) { + if ((!p.second.target_max_objects && !p.second.target_max_bytes) || + !pg_pool_sum.count(p.first)) { + continue; + } + bool nearfull = false; + const string& name = osdmap.get_pool_name(p.first); + const pool_stat_t& st = get_pg_pool_sum_stat(p.first); + uint64_t ratio = p.second.cache_target_full_ratio_micro + + ((1000000 - p.second.cache_target_full_ratio_micro) * + cct->_conf->mon_cache_target_full_warn_ratio); + if (p.second.target_max_objects && + (uint64_t)(st.stats.sum.num_objects - + st.stats.sum.num_objects_hit_set_archive) > + p.second.target_max_objects * (ratio / 1000000.0)) { + ostringstream ss; + ss << "cache pool '" << name << "' with " + << si_t(st.stats.sum.num_objects) + << " objects at/near target max " + << si_t(p.second.target_max_objects) << " objects"; + detail.push_back(ss.str()); + nearfull = true; + } + if (p.second.target_max_bytes && + (uint64_t)(st.stats.sum.num_bytes - + st.stats.sum.num_bytes_hit_set_archive) > + p.second.target_max_bytes * (ratio / 1000000.0)) { + ostringstream ss; + ss << "cache pool '" << name + << "' with " << si_t(st.stats.sum.num_bytes) + << "B at/near target max " + << si_t(p.second.target_max_bytes) << "B"; + detail.push_back(ss.str()); + nearfull = true; + } + if (nearfull) { + ++num_pools; + } + } + if (!detail.empty()) { + ostringstream ss; + ss << num_pools << " cache pools at or near target size"; + auto& d = checks->add("CACHE_POOL_NEAR_FULL", HEALTH_WARN, ss.str()); + d.detail.swap(detail); + } + } + + // TOO_FEW_PGS + int num_in = osdmap.get_num_in_osds(); + int sum_pg_up = MAX(pg_sum.up, static_cast(pg_stat.size())); + if (num_in && + cct->_conf->mon_pg_warn_min_per_osd > 0 && + osdmap.get_pools().size() > 0) { + int per = sum_pg_up / num_in; + if (per < cct->_conf->mon_pg_warn_min_per_osd && per) { + ostringstream ss; + ss << "too few PGs per OSD (" << per + << " < min " << cct->_conf->mon_pg_warn_min_per_osd << ")"; + checks->add("TOO_FEW_PGS", HEALTH_WARN, ss.str()); + } + } + + // TOO_MANY_PGS + if (num_in && cct->_conf->mon_pg_warn_max_per_osd > 0) { + int per = sum_pg_up / num_in; + if (per > cct->_conf->mon_pg_warn_max_per_osd) { + ostringstream ss; + ss << "too many PGs per OSD (" << per + << " > max " << cct->_conf->mon_pg_warn_max_per_osd << ")"; + checks->add("TOO_MANY_PGS", HEALTH_WARN, ss.str()); + } + } + + // SMALLER_PGP_NUM + // MANY_OBJECTS_PER_PG + if (!pg_stat.empty()) { + list pgp_detail, many_detail; + for (auto p = pg_pool_sum.begin(); + p != pg_pool_sum.end(); + ++p) { + const pg_pool_t *pi = osdmap.get_pg_pool(p->first); + if (!pi) + continue; // in case osdmap changes haven't propagated to PGMap yet + const string& name = osdmap.get_pool_name(p->first); + if (pi->get_pg_num() > pi->get_pgp_num() && + !(name.find(".DELETED") != string::npos && + cct->_conf->mon_fake_pool_delete)) { + ostringstream ss; + ss << "pool " << name << " pg_num " + << pi->get_pg_num() << " > pgp_num " << pi->get_pgp_num(); + pgp_detail.push_back(ss.str()); + } + int average_objects_per_pg = pg_sum.stats.sum.num_objects / pg_stat.size(); + if (average_objects_per_pg > 0 && + pg_sum.stats.sum.num_objects >= cct->_conf->mon_pg_warn_min_objects && + p->second.stats.sum.num_objects >= + cct->_conf->mon_pg_warn_min_pool_objects) { + int objects_per_pg = p->second.stats.sum.num_objects / pi->get_pg_num(); + float ratio = (float)objects_per_pg / (float)average_objects_per_pg; + if (cct->_conf->mon_pg_warn_max_object_skew > 0 && + ratio > cct->_conf->mon_pg_warn_max_object_skew) { + ostringstream ss; + ss << "pool " << name << " objects per pg (" + << objects_per_pg << ") is more than " << ratio + << " times cluster average (" + << average_objects_per_pg << ")"; + many_detail.push_back(ss.str()); + } + } + } + if (!pgp_detail.empty()) { + ostringstream ss; + ss << pgp_detail.size() << " pools have pg_num > pgp_num"; + auto& d = checks->add("SMALLER_PGP_NUM", HEALTH_WARN, ss.str()); + d.detail.swap(pgp_detail); + } + if (!many_detail.empty()) { + ostringstream ss; + ss << many_detail.size() << " pools have many more objects per pg than" + << " average"; + auto& d = checks->add("MANY_OBJECTS_PER_PG", HEALTH_WARN, ss.str()); + d.detail.swap(many_detail); + } + } + + // POOL_FULL + // POOL_NEAR_FULL + { + float warn_threshold = (float)g_conf->mon_pool_quota_warn_threshold/100; + float crit_threshold = (float)g_conf->mon_pool_quota_crit_threshold/100; + list full_detail, nearfull_detail; + unsigned full_pools = 0, nearfull_pools = 0; + for (auto it : pools) { + auto it2 = pg_pool_sum.find(it.first); + if (it2 == pg_pool_sum.end()) { + continue; + } + const pool_stat_t *pstat = &it2->second; + const object_stat_sum_t& sum = pstat->stats.sum; + const string& pool_name = osdmap.get_pool_name(it.first); + const pg_pool_t &pool = it.second; + bool full = false, nearfull = false; + if (pool.quota_max_objects > 0) { + stringstream ss; + if ((uint64_t)sum.num_objects >= pool.quota_max_objects) { + } else if (crit_threshold > 0 && + sum.num_objects >= pool.quota_max_objects*crit_threshold) { + ss << "pool '" << pool_name + << "' has " << sum.num_objects << " objects" + << " (max " << pool.quota_max_objects << ")"; + full_detail.push_back(ss.str()); + full = true; + } else if (warn_threshold > 0 && + sum.num_objects >= pool.quota_max_objects*warn_threshold) { + ss << "pool '" << pool_name + << "' has " << sum.num_objects << " objects" + << " (max " << pool.quota_max_objects << ")"; + nearfull_detail.push_back(ss.str()); + nearfull = true; + } + } + if (pool.quota_max_bytes > 0) { + stringstream ss; + if ((uint64_t)sum.num_bytes >= pool.quota_max_bytes) { + } else if (crit_threshold > 0 && + sum.num_bytes >= pool.quota_max_bytes*crit_threshold) { + ss << "pool '" << pool_name + << "' has " << si_t(sum.num_bytes) << " bytes" + << " (max " << si_t(pool.quota_max_bytes) << ")"; + full_detail.push_back(ss.str()); + full = true; + } else if (warn_threshold > 0 && + sum.num_bytes >= pool.quota_max_bytes*warn_threshold) { + ss << "pool '" << pool_name + << "' has " << si_t(sum.num_bytes) << " bytes" + << " (max " << si_t(pool.quota_max_bytes) << ")"; + nearfull_detail.push_back(ss.str()); + nearfull = true; + } + } + if (full) { + ++full_pools; + } + if (nearfull) { + ++nearfull_pools; + } + } + if (full_pools) { + ostringstream ss; + ss << full_pools << " pools full"; + auto& d = checks->add("POOL_FULL", HEALTH_ERR, ss.str()); + d.detail.swap(full_detail); + } + if (nearfull_pools) { + ostringstream ss; + ss << nearfull_pools << " pools full"; + auto& d = checks->add("POOL_NEAR_FULL", HEALTH_WARN, ss.str()); + d.detail.swap(nearfull_detail); + } + } + // OBJECT_MISPLACED + if (pg_sum.stats.sum.num_objects_misplaced && + pg_sum.stats.sum.num_object_copies > 0) { + double pc = (double)pg_sum.stats.sum.num_objects_misplaced / + (double)pg_sum.stats.sum.num_object_copies * (double)100.0; + char b[20]; + snprintf(b, sizeof(b), "%.3lf", pc); + ostringstream ss; + ss << pg_sum.stats.sum.num_objects_misplaced + << "/" << pg_sum.stats.sum.num_object_copies << " objects misplaced (" + << b << "%)"; + checks->add("OBJECT_MISPLACED", HEALTH_WARN, ss.str()); + } + + // OBJECT_UNFOUND + if (pg_sum.stats.sum.num_objects_unfound && + pg_sum.stats.sum.num_objects) { + double pc = (double)pg_sum.stats.sum.num_objects_unfound / + (double)pg_sum.stats.sum.num_objects * (double)100.0; + char b[20]; + snprintf(b, sizeof(b), "%.3lf", pc); + ostringstream ss; + ss << pg_sum.stats.sum.num_objects_unfound + << "/" << pg_sum.stats.sum.num_objects << " unfound (" << b << "%)"; + checks->add("OBJECT_UNFOUND", HEALTH_WARN, ss.str()); + } + + // REQUEST_SLOW + // REQUEST_STUCK + if (cct->_conf->mon_osd_warn_op_age > 0 && + osd_sum.op_queue_age_hist.upper_bound() > + cct->_conf->mon_osd_warn_op_age) { + list warn_detail, error_detail; + unsigned warn = 0, error = 0; + float err_age = + cct->_conf->mon_osd_warn_op_age * cct->_conf->mon_osd_err_op_age_ratio; + const pow2_hist_t& h = osd_sum.op_queue_age_hist; + for (unsigned i = h.h.size() - 1; i > 0; --i) { + float ub = (float)(1 << i) / 1000.0; + if (ub < cct->_conf->mon_osd_warn_op_age) + break; + if (h.h[i]) { + ostringstream ss; + ss << h.h[i] << " ops are blocked > " << ub << " sec"; + if (ub > err_age) { + error += h.h[i]; + error_detail.push_back(ss.str()); + } else { + warn += h.h[i]; + warn_detail.push_back(ss.str()); + } + } + } + + map> warn_osd_by_max; // max -> osds + map> error_osd_by_max; // max -> osds + if (!warn_detail.empty() || !error_detail.empty()) { + for (auto& p : osd_stat) { + const pow2_hist_t& h = p.second.op_queue_age_hist; + for (unsigned i = h.h.size() - 1; i > 0; --i) { + float ub = (float)(1 << i) / 1000.0; + if (ub < cct->_conf->mon_osd_warn_op_age) + break; + if (h.h[i]) { + if (ub > err_age) { + error_osd_by_max[ub].insert(p.first); + } else { + warn_osd_by_max[ub].insert(p.first); + } + break; + } + } + } + } + + if (!warn_detail.empty()) { + ostringstream ss; + ss << warn << " slow requests are blocked > " + << cct->_conf->mon_osd_warn_op_age << " sec"; + auto& d = checks->add("REQUEST_SLOW", HEALTH_WARN, ss.str()); + d.detail.swap(warn_detail); + int left = max; + for (auto& p : warn_osd_by_max) { + ostringstream ss; + if (p.second.size() > 1) { + ss << "osds " << p.second; + } else { + ss << "osd." << *p.second.begin(); + } + ss << " have blocked requests > " << p.first << " sec"; + d.detail.push_back(ss.str()); + if (--left == 0) { + break; + } + } + } + if (!error_detail.empty()) { + ostringstream ss; + ss << warn << " stuck requests are blocked > " + << err_age << " sec"; + auto& d = checks->add("REQUEST_STUCK", HEALTH_ERR, ss.str()); + d.detail.swap(error_detail); + int left = max; + for (auto& p : error_osd_by_max) { + ostringstream ss; + if (p.second.size() > 1) { + ss << "osds " << p.second; + } else { + ss << "osd." << *p.second.begin(); + } + ss << " have stuck requests > " << p.first << " sec"; + d.detail.push_back(ss.str()); + if (--left == 0) { + break; + } + } + } + } + + // PG_NOT_SCRUBBED + // PG_NOT_DEEP_SCRUBBED + { + list detail, deep_detail; + const double age = cct->_conf->mon_warn_not_scrubbed + + cct->_conf->mon_scrub_interval; + utime_t cutoff = now; + cutoff -= age; + const double deep_age = cct->_conf->mon_warn_not_deep_scrubbed + + cct->_conf->osd_deep_scrub_interval; + utime_t deep_cutoff = now; + deep_cutoff -= deep_age; + for (auto& p : pg_stat) { + if (p.second.last_scrub_stamp < cutoff) { + ostringstream ss; + ss << "pg " << p.first << " not scrubbed since " + << p.second.last_scrub_stamp; + detail.push_back(ss.str()); + } + if (p.second.last_deep_scrub_stamp < deep_cutoff) { + ostringstream ss; + ss << "pg " << p.first << " not deep-scrubbed since " + << p.second.last_deep_scrub_stamp; + deep_detail.push_back(ss.str()); + } + } + if (!detail.empty()) { + ostringstream ss; + ss << detail.size() << " pgs not scrubbed for " << age; + auto& d = checks->add("PG_NOT_SCRUBBED", HEALTH_WARN, ss.str()); + d.detail.swap(detail); + } + if (!deep_detail.empty()) { + ostringstream ss; + ss << deep_detail.size() << " pgs not deep-scrubbed for " << age; + auto& d = checks->add("PG_NOT_DEEP_SCRUBBED", HEALTH_WARN, ss.str()); + d.detail.swap(deep_detail); + } } } @@ -2938,6 +3604,70 @@ void PGMap::get_health( } } + for (auto it : pools) { + auto it2 = pg_pool_sum.find(it.first); + if (it2 == pg_pool_sum.end()) { + continue; + } + const pool_stat_t *pstat = &it2->second; + const object_stat_sum_t& sum = pstat->stats.sum; + const string& pool_name = osdmap.get_pool_name(it.first); + const pg_pool_t &pool = it.second; + + float warn_threshold = (float)g_conf->mon_pool_quota_warn_threshold/100; + float crit_threshold = (float)g_conf->mon_pool_quota_crit_threshold/100; + + if (pool.quota_max_objects > 0) { + stringstream ss; + health_status_t status = HEALTH_OK; + if ((uint64_t)sum.num_objects >= pool.quota_max_objects) { + } else if (crit_threshold > 0 && + sum.num_objects >= pool.quota_max_objects*crit_threshold) { + ss << "pool '" << pool_name + << "' has " << sum.num_objects << " objects" + << " (max " << pool.quota_max_objects << ")"; + status = HEALTH_ERR; + } else if (warn_threshold > 0 && + sum.num_objects >= pool.quota_max_objects*warn_threshold) { + ss << "pool '" << pool_name + << "' has " << sum.num_objects << " objects" + << " (max " << pool.quota_max_objects << ")"; + status = HEALTH_WARN; + } + if (status != HEALTH_OK) { + pair s(status, ss.str()); + summary.push_back(s); + if (detail) + detail->push_back(s); + } + } + + if (pool.quota_max_bytes > 0) { + health_status_t status = HEALTH_OK; + stringstream ss; + if ((uint64_t)sum.num_bytes >= pool.quota_max_bytes) { + } else if (crit_threshold > 0 && + sum.num_bytes >= pool.quota_max_bytes*crit_threshold) { + ss << "pool '" << pool_name + << "' has " << si_t(sum.num_bytes) << " bytes" + << " (max " << si_t(pool.quota_max_bytes) << ")"; + status = HEALTH_ERR; + } else if (warn_threshold > 0 && + sum.num_bytes >= pool.quota_max_bytes*warn_threshold) { + ss << "pool '" << pool_name + << "' has " << si_t(sum.num_bytes) << " bytes" + << " (max " << si_t(pool.quota_max_bytes) << ")"; + status = HEALTH_WARN; + } + if (status != HEALTH_OK) { + pair s(status, ss.str()); + summary.push_back(s); + if (detail) + detail->push_back(s); + } + } + } + print_unscrubbed_pgs(pg_stat, summary, detail, cct); } diff --git a/src/mon/PGMap.h b/src/mon/PGMap.h index 6d58e6b2546d7..3e81c7e05fa61 100644 --- a/src/mon/PGMap.h +++ b/src/mon/PGMap.h @@ -21,10 +21,12 @@ #ifndef CEPH_PGMAP_H #define CEPH_PGMAP_H +#include "include/health.h" #include "common/debug.h" #include "common/TextTable.h" #include "osd/osd_types.h" #include "include/mempool.h" +#include "mon/health_check.h" #include #include "mon/PGStatService.h" @@ -495,6 +497,11 @@ class PGMap : public PGMapDigest { list >& summary, list > *detail) const; + void get_health_checks( + CephContext *cct, + const OSDMap& osdmap, + health_check_map_t *checks) const; + static void generate_test_instances(list& o); }; WRITE_CLASS_ENCODER_FEATURES(PGMap::Incremental) diff --git a/src/mon/PaxosService.cc b/src/mon/PaxosService.cc index b133fc1a582bb..91152943b0602 100644 --- a/src/mon/PaxosService.cc +++ b/src/mon/PaxosService.cc @@ -431,3 +431,12 @@ void PaxosService::trim(MonitorDBStore::TransactionRef t, } } +void PaxosService::load_health() +{ + bufferlist bl; + mon->store->get("health", service_name, bl); + if (bl.length()) { + auto p = bl.begin(); + ::decode(health_checks, p); + } +} diff --git a/src/mon/PaxosService.h b/src/mon/PaxosService.h index ca75915841e59..da3038ff1e9f9 100644 --- a/src/mon/PaxosService.h +++ b/src/mon/PaxosService.h @@ -77,15 +77,23 @@ class PaxosService { */ bool have_pending; -protected: + /** + * health checks for this service + * + * Child must populate this during encode_pending() by calling encode_health(). + */ + health_check_map_t health_checks; +public: + const health_check_map_t& get_health_checks() { + return health_checks; + } +protected: /** * format of our state in leveldb, 0 for default */ version_t format_version; - - /** * @defgroup PaxosService_h_callbacks Callback classes * @{ @@ -428,6 +436,15 @@ class PaxosService { list > *detail, CephContext *cct) const { } + void encode_health(const health_check_map_t& next, + MonitorDBStore::TransactionRef t) { + bufferlist bl; + ::encode(next, bl); + t->put("health", service_name, bl); + mon->log_health(next, health_checks, t); + } + void load_health(); + private: /** * @defgroup PaxosService_h_store_keys Set of keys that are usually used on diff --git a/src/mon/QuorumService.h b/src/mon/QuorumService.h index b354c40a77f77..626ce659e573c 100644 --- a/src/mon/QuorumService.h +++ b/src/mon/QuorumService.h @@ -117,8 +117,7 @@ class QuorumService virtual void init() { } - virtual void get_health(Formatter *f, - list >& summary, + virtual void get_health(list >& summary, list > *detail) = 0; virtual int get_type() = 0; virtual string get_name() const = 0; diff --git a/src/mon/health_check.h b/src/mon/health_check.h new file mode 100644 index 0000000000000..e9e53836e4774 --- /dev/null +++ b/src/mon/health_check.h @@ -0,0 +1,192 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include +#include + +#include "include/health.h" +#include "common/Formatter.h" + +struct health_check_t { + health_status_t severity; + std::string summary; + std::list detail; + + DENC(health_check_t, v, p) { + DENC_START(1, 1, p); + denc(v.severity, p); + denc(v.summary, p); + denc(v.detail, p); + DENC_FINISH(p); + } + + friend bool operator==(const health_check_t& l, + const health_check_t& r) { + return l.severity == r.severity && + l.summary == r.summary && + l.detail == r.detail; + } + friend bool operator!=(const health_check_t& l, + const health_check_t& r) { + return !(l == r); + } + + void dump(Formatter *f) const { + f->dump_stream("severity") << severity; + f->dump_string("summary", summary); + f->open_array_section("detail"); + for (auto& p : detail) { + f->dump_string("item", p); + } + f->close_section(); + } + + static void generate_test_instances(list& ls) { + ls.push_back(new health_check_t); + ls.push_back(new health_check_t); + ls.back()->severity = HEALTH_ERR; + ls.back()->summary = "summarization"; + ls.back()->detail = {"one", "two", "three"}; + } +}; +WRITE_CLASS_DENC(health_check_t) + + +struct health_check_map_t { + map checks; + + DENC(health_check_map_t, v, p) { + DENC_START(1, 1, p); + denc(v.checks, p); + DENC_FINISH(p); + } + + void dump(Formatter *f) const { + for (auto& p : checks) { + f->dump_object(p.first.c_str(), p.second); + } + } + + static void generate_test_instances(list& ls) { + ls.push_back(new health_check_map_t); + ls.push_back(new health_check_map_t); + { + auto& d = ls.back()->add("FOO", HEALTH_WARN, "foo"); + d.detail.push_back("a"); + d.detail.push_back("b"); + } + { + auto& d = ls.back()->add("BAR", HEALTH_ERR, "bar!"); + d.detail.push_back("c"); + d.detail.push_back("d"); + } + } + + void clear() { + checks.clear(); + } + void swap(health_check_map_t& other) { + checks.swap(other.checks); + } + + health_check_t& add(const std::string& code, + health_status_t severity, + const std::string& summary) { + assert(checks.count(code) == 0); + health_check_t& r = checks[code]; + r.severity = severity; + r.summary = summary; + return r; + } + + void merge(const health_check_map_t& o) { + for (auto& p : o.checks) { + auto q = checks.find(p.first); + if (q == checks.end()) { + // new check + checks[p.first] = p.second; + } else { + // merge details, and hope the summary matches! + q->second.detail.insert( + q->second.detail.end(), + p.second.detail.begin(), + p.second.detail.end()); + } + } + } + + health_status_t dump_summary(Formatter *f, std::string *plain, + const char *sep, bool detail) const { + health_status_t r = HEALTH_OK; + for (auto& p : checks) { + if (r > p.second.severity) { + r = p.second.severity; + } + if (f) { + f->open_object_section(p.first.c_str()); + f->dump_stream("severity") << p.second.severity; + f->dump_string("message", p.second.summary); + if (detail) { + f->open_array_section("detail"); + for (auto& d : p.second.detail) { + f->dump_string("item", d); + } + f->close_section(); + } + f->close_section(); + } else { + if (!plain->empty()) { + *plain += sep; + } + *plain += p.second.summary; + } + } + return r; + } + + void dump_summary_compat(Formatter *f) const { + for (auto& p : checks) { + f->open_object_section("item"); + f->dump_stream("severity") << p.second.severity; + f->dump_string("summary", p.second.summary); + f->close_section(); + } + } + + void dump_detail(Formatter *f, std::string *plain, bool compat) const { + for (auto& p : checks) { + if (f) { + if (compat) { + // this is sloppy, but the best we can do: just dump all of the + // individual checks' details together + for (auto& d : p.second.detail) { + f->dump_string("item", d); + } + } + } else { + if (!compat) { + *plain += p.first + " " + p.second.summary + "\n"; + } + for (auto& d : p.second.detail) { + if (!compat) { + *plain += " "; + } + *plain += d; + *plain += "\n"; + } + } + } + } + + friend bool operator==(const health_check_map_t& l, + const health_check_map_t& r) { + return l.checks == r.checks; + } + friend bool operator!=(const health_check_map_t& l, + const health_check_map_t& r) { + return !(l == r); + } +}; +WRITE_CLASS_DENC(health_check_map_t) diff --git a/src/mon/mon_types.h b/src/mon/mon_types.h index 883f4669e2b7f..a23238b7d0b9c 100644 --- a/src/mon/mon_types.h +++ b/src/mon/mon_types.h @@ -31,7 +31,8 @@ #define PAXOS_AUTH 5 #define PAXOS_MGR 6 #define PAXOS_MGRSTAT 7 -#define PAXOS_NUM 8 +#define PAXOS_HEALTH 8 +#define PAXOS_NUM 9 inline const char *get_paxos_name(int p) { switch (p) { @@ -43,6 +44,7 @@ inline const char *get_paxos_name(int p) { case PAXOS_AUTH: return "auth"; case PAXOS_MGR: return "mgr"; case PAXOS_MGRSTAT: return "mgrstat"; + case PAXOS_HEALTH: return "health"; default: ceph_abort(); return 0; } } diff --git a/src/msg/Message.cc b/src/msg/Message.cc index 4860889989fe9..9d1953d75b195 100644 --- a/src/msg/Message.cc +++ b/src/msg/Message.cc @@ -96,6 +96,7 @@ using namespace std; #include "messages/MMonGetVersion.h" #include "messages/MMonGetVersionReply.h" #include "messages/MMonHealth.h" +#include "messages/MMonHealthChecks.h" #include "messages/MMonMetadata.h" #include "messages/MDataPing.h" #include "messages/MAuth.h" @@ -783,6 +784,11 @@ Message *decode_message(CephContext *cct, int crcflags, case MSG_MON_HEALTH: m = new MMonHealth(); break; + + case MSG_MON_HEALTH_CHECKS: + m = new MMonHealthChecks(); + break; + #if defined(HAVE_XIO) case MSG_DATA_PING: m = new MDataPing(); diff --git a/src/msg/Message.h b/src/msg/Message.h index 611d691df992c..d1b63ac1f2199 100644 --- a/src/msg/Message.h +++ b/src/msg/Message.h @@ -183,6 +183,8 @@ // Special #define MSG_NOP 0x607 +#define MSG_MON_HEALTH_CHECKS 0x608 + // *** ceph-mgr <-> OSD/MDS daemons *** #define MSG_MGR_OPEN 0x700 #define MSG_MGR_CONFIGURE 0x701 diff --git a/src/osd/OSDMap.cc b/src/osd/OSDMap.cc index d9cf86338375b..2f441eb9a66ad 100644 --- a/src/osd/OSDMap.cc +++ b/src/osd/OSDMap.cc @@ -15,6 +15,8 @@ * */ +#include + #include "OSDMap.h" #include #include "common/config.h" @@ -24,6 +26,7 @@ #include "include/str_map.h" #include "common/code_environment.h" +#include "mon/health_check.h" #include "crush/CrushTreeDumper.h" #include "common/Clock.h" @@ -4261,3 +4264,362 @@ void print_osd_utilization(const OSDMap& osdmap, out << tbl << d.summary() << "\n"; } } + +void OSDMap::check_health(health_check_map_t *checks) const +{ + int num_osds = get_num_osds(); + + // OSD_DOWN + // OSD_$subtree_DOWN + // OSD_ORPHAN + if (num_osds >= 0) { + int num_in_osds = 0; + int num_down_in_osds = 0; + set osds; + set down_in_osds; + set up_in_osds; + set subtree_up; + unordered_map > subtree_type_down; + unordered_map num_osds_subtree; + int max_type = crush->get_max_type_id(); + + for (int i = 0; i < get_max_osd(); i++) { + if (!exists(i)) { + if (crush->item_exists(i)) { + osds.insert(i); + } + continue; + } + if (is_out(i)) + continue; + ++num_in_osds; + if (down_in_osds.count(i) || up_in_osds.count(i)) + continue; + if (!is_up(i)) { + down_in_osds.insert(i); + int parent_id = 0; + int current = i; + for (int type = 0; type <= max_type; type++) { + if (!crush->get_type_name(type)) + continue; + int r = crush->get_immediate_parent_id(current, &parent_id); + if (r == -ENOENT) + break; + // break early if this parent is already marked as up + if (subtree_up.count(parent_id)) + break; + type = crush->get_bucket_type(parent_id); + if (!subtree_type_is_down( + g_ceph_context, parent_id, type, + &down_in_osds, &up_in_osds, &subtree_up, &subtree_type_down)) + break; + current = parent_id; + } + } + } + + // calculate the number of down osds in each down subtree and + // store it in num_osds_subtree + for (int type = 1; type <= max_type; type++) { + if (!crush->get_type_name(type)) + continue; + for (auto j = subtree_type_down[type].begin(); + j != subtree_type_down[type].end(); + ++j) { + if (type == 1) { + list children; + int num = crush->get_children(*j, &children); + num_osds_subtree[*j] = num; + } else { + list children; + int num = 0; + int num_children = crush->get_children(*j, &children); + if (num_children == 0) + continue; + for (auto l = children.begin(); l != children.end(); ++l) { + if (num_osds_subtree[*l] > 0) { + num = num + num_osds_subtree[*l]; + } + } + num_osds_subtree[*j] = num; + } + } + } + num_down_in_osds = down_in_osds.size(); + assert(num_down_in_osds <= num_in_osds); + if (num_down_in_osds > 0) { + // summary of down subtree types and osds + for (int type = max_type; type > 0; type--) { + if (!crush->get_type_name(type)) + continue; + if (subtree_type_down[type].size() > 0) { + ostringstream ss; + ss << subtree_type_down[type].size() << " " + << crush->get_type_name(type); + if (subtree_type_down[type].size() > 1) { + ss << "s"; + } + int sum_down_osds = 0; + for (auto j = subtree_type_down[type].begin(); + j != subtree_type_down[type].end(); + ++j) { + sum_down_osds = sum_down_osds + num_osds_subtree[*j]; + } + ss << " (" << sum_down_osds << " osds) down"; + string err = string("OSD_") + + string(crush->get_type_name(type)) + "_DOWN"; + boost::to_upper(err); + auto& d = checks->add(err, HEALTH_WARN, ss.str()); + for (auto j = subtree_type_down[type].rbegin(); + j != subtree_type_down[type].rend(); + ++j) { + ostringstream ss; + ss << crush->get_type_name(type); + ss << " "; + ss << crush->get_item_name(*j); + // at the top level, do not print location + if (type != max_type) { + ss << " ("; + ss << crush->get_full_location_ordered_string(*j); + ss << ")"; + } + int num = num_osds_subtree[*j]; + ss << " (" << num << " osds)"; + ss << " is down"; + d.detail.push_back(ss.str()); + } + } + } + ostringstream ss; + ss << down_in_osds.size() << " osds down"; + auto& d = checks->add("OSD_DOWN", HEALTH_WARN, ss.str()); + for (auto it = down_in_osds.begin(); it != down_in_osds.end(); ++it) { + ostringstream ss; + ss << "osd." << *it << " ("; + ss << crush->get_full_location_ordered_string(*it); + ss << ") is down"; + d.detail.push_back(ss.str()); + } + } + + if (!osds.empty()) { + ostringstream ss; + ss << osds.size() << " osds exist in the crush map but not in the osdmap"; + auto& d = checks->add("OSD_ORPHAN", HEALTH_WARN, ss.str()); + for (auto osd : osds) { + ostringstream ss; + ss << "osd." << osd << " exists in crush map but not in osdmap"; + d.detail.push_back(ss.str()); + } + } + } + + // OSD_OUT_OF_ORDER_FULL + { + // An osd could configure failsafe ratio, to something different + // but for now assume it is the same here. + float fsr = g_conf->osd_failsafe_full_ratio; + if (fsr > 1.0) fsr /= 100; + float fr = get_full_ratio(); + float br = get_backfillfull_ratio(); + float nr = get_nearfull_ratio(); + + list detail; + // These checks correspond to how OSDService::check_full_status() in an OSD + // handles the improper setting of these values. + if (br < nr) { + ostringstream ss; + ss << "backfillfull_ratio (" << br + << ") < nearfull_ratio (" << nr << "), increased"; + detail.push_back(ss.str()); + br = nr; + } + if (fr < br) { + ostringstream ss; + ss << "full_ratio (" << fr << ") < backfillfull_ratio (" << br + << "), increased"; + detail.push_back(ss.str()); + fr = br; + } + if (fsr < fr) { + ostringstream ss; + ss << "osd_failsafe_full_ratio (" << fsr << ") < full_ratio (" << fr + << "), increased"; + detail.push_back(ss.str()); + } + if (!detail.empty()) { + auto& d = checks->add("OSD_OUT_OF_ORDER_FULL", HEALTH_ERR, + "full ratio(s) out of order"); + d.detail.swap(detail); + } + } + + // OSD_FULL + // OSD_NEARFULL + // OSD_BACKFILLFULL + // OSD_FAILSAFE_FULL + { + set full, backfillfull, nearfull; + get_full_osd_counts(&full, &backfillfull, &nearfull); + if (full.size()) { + ostringstream ss; + ss << full.size() << " full osd(s)"; + auto& d = checks->add("OSD_FULL", HEALTH_ERR, ss.str()); + for (auto& i: full) { + ostringstream ss; + ss << "osd." << i << " is full"; + d.detail.push_back(ss.str()); + } + } + if (backfillfull.size()) { + ostringstream ss; + ss << backfillfull.size() << " backfillfull osd(s)"; + auto& d = checks->add("OSD_BACKFILLFULL", HEALTH_WARN, ss.str()); + for (auto& i: backfillfull) { + ostringstream ss; + ss << "osd." << i << " is backfill full"; + d.detail.push_back(ss.str()); + } + } + if (nearfull.size()) { + ostringstream ss; + ss << nearfull.size() << " nearfull osd(s)"; + auto& d = checks->add("OSD_NEARFULL", HEALTH_WARN, ss.str()); + for (auto& i: nearfull) { + ostringstream ss; + ss << "osd." << i << " is near full"; + d.detail.push_back(ss.str()); + } + } + } + + // OSDMAP_FLAGS + { + // warn about flags + uint64_t warn_flags = + CEPH_OSDMAP_FULL | + CEPH_OSDMAP_PAUSERD | + CEPH_OSDMAP_PAUSEWR | + CEPH_OSDMAP_PAUSEREC | + CEPH_OSDMAP_NOUP | + CEPH_OSDMAP_NODOWN | + CEPH_OSDMAP_NOIN | + CEPH_OSDMAP_NOOUT | + CEPH_OSDMAP_NOBACKFILL | + CEPH_OSDMAP_NORECOVER | + CEPH_OSDMAP_NOSCRUB | + CEPH_OSDMAP_NODEEP_SCRUB | + CEPH_OSDMAP_NOTIERAGENT | + CEPH_OSDMAP_NOREBALANCE; + if (test_flag(warn_flags)) { + ostringstream ss; + ss << get_flag_string(get_flags() & warn_flags) + << " flag(s) set"; + checks->add("OSDMAP_FLAGS", HEALTH_WARN, ss.str()); + } + } + + // OSD_FLAGS + { + list detail; + const unsigned flags = + CEPH_OSD_NOUP | + CEPH_OSD_NOIN | + CEPH_OSD_NODOWN | + CEPH_OSD_NOOUT; + for (int i = 0; i < max_osd; ++i) { + if (osd_state[i] & flags) { + ostringstream ss; + set states; + OSDMap::calc_state_set(osd_state[i] & flags, states); + ss << "osd." << i << " has flags " << states; + detail.push_back(ss.str()); + } + } + if (!detail.empty()) { + ostringstream ss; + ss << detail.size() << " osd(s) have {NOUP,NODOWN,NOIN,NOOUT} flags set"; + auto& d = checks->add("OSD_FLAGS", HEALTH_WARN, ss.str()); + d.detail.swap(detail); + } + } + + // OLD_CRUSH_TUNABLES + if (g_conf->mon_warn_on_legacy_crush_tunables) { + string min = crush->get_min_required_version(); + if (min < g_conf->mon_crush_min_required_version) { + ostringstream ss; + ss << "crush map has legacy tunables (require " << min + << ", min is " << g_conf->mon_crush_min_required_version << ")"; + auto& d = checks->add("OLD_CRUSH_TUNABLES", HEALTH_WARN, ss.str()); + d.detail.push_back("see http://docs.ceph.com/docs/master/rados/operations/crush-map/#tunables"); + } + } + + // OLD_CRUSH_STRAW_CALC_VERSION + if (g_conf->mon_warn_on_crush_straw_calc_version_zero) { + if (crush->get_straw_calc_version() == 0) { + ostringstream ss; + ss << "crush map has straw_calc_version=0"; + auto& d = checks->add("OLD_CRUSH_STRAW_CALC_VERSION", HEALTH_WARN, ss.str()); + d.detail.push_back( + "see http://docs.ceph.com/docs/master/rados/operations/crush-map/#tunables"); + } + } + + // CACHE_POOL_NO_HIT_SET + if (g_conf->mon_warn_on_cache_pools_without_hit_sets) { + list detail; + for (map::const_iterator p = pools.begin(); + p != pools.end(); + ++p) { + const pg_pool_t& info = p->second; + if (info.cache_mode_requires_hit_set() && + info.hit_set_params.get_type() == HitSet::TYPE_NONE) { + ostringstream ss; + ss << "pool '" << get_pool_name(p->first) + << "' with cache_mode " << info.get_cache_mode_name() + << " needs hit_set_type to be set but it is not"; + detail.push_back(ss.str()); + } + } + if (!detail.empty()) { + ostringstream ss; + ss << detail.size() << " cache pools are missing hit_sets"; + auto& d = checks->add("CACHE_POOL_NO_HIT_SET", HEALTH_WARN, ss.str()); + d.detail.swap(detail); + } + } + + // OSD_NO_SORTBITWISE + if (!test_flag(CEPH_OSDMAP_SORTBITWISE) && + (get_up_osd_features() & + CEPH_FEATURE_OSD_BITWISE_HOBJ_SORT)) { + ostringstream ss; + ss << "no legacy OSD present but 'sortbitwise' flag is not set"; + checks->add("OSD_NO_SORTBITWISE", HEALTH_WARN, ss.str()); + } + + // OSD_UPGRADE_FINISHED + // none of these (yet) since we don't run until luminous upgrade is done. + + // POOL_FULL + { + list detail; + for (auto it : get_pools()) { + const pg_pool_t &pool = it.second; + if (pool.has_flag(pg_pool_t::FLAG_FULL)) { + const string& pool_name = get_pool_name(it.first); + stringstream ss; + ss << "pool '" << pool_name << "' is full"; + detail.push_back(ss.str()); + } + } + if (!detail.empty()) { + ostringstream ss; + ss << detail.size() << " pool(s) full"; + auto& d = checks->add("POOL_FULL", HEALTH_WARN, ss.str()); + d.detail.swap(detail); + } + } +} diff --git a/src/osd/OSDMap.h b/src/osd/OSDMap.h index b6301f1fdc3c4..6538c9e62964d 100644 --- a/src/osd/OSDMap.h +++ b/src/osd/OSDMap.h @@ -41,6 +41,7 @@ using namespace std; // forward declaration class CephContext; class CrushWrapper; +class health_check_map_t; // FIXME C++11 does not have std::equal for two differently-typed containers. // use this until we move to c++14 @@ -1362,6 +1363,8 @@ class OSDMap { void dump(Formatter *f) const; static void generate_test_instances(list& o); bool check_new_blacklist_entries() const { return new_blacklist_entries; } + + void check_health(health_check_map_t *checks) const; }; WRITE_CLASS_ENCODER_FEATURES(OSDMap) WRITE_CLASS_ENCODER_FEATURES(OSDMap::Incremental) diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 725c5dd2d5afc..05ad63c69c121 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -1677,7 +1677,7 @@ void PG::activate(ObjectStore::Transaction& t, * behind. */ // backfill - osd->clog->info() << info.pgid << " starting backfill to osd." << peer + osd->clog->debug() << info.pgid << " starting backfill to osd." << peer << " from (" << pi.log_tail << "," << pi.last_update << "] " << pi.last_backfill << " to " << info.last_update; diff --git a/src/pybind/mgr/dashboard/base.html b/src/pybind/mgr/dashboard/base.html index e7256d1ba0aeb..18874fb565f06 100644 --- a/src/pybind/mgr/dashboard/base.html +++ b/src/pybind/mgr/dashboard/base.html @@ -39,7 +39,7 @@ var refresh = function() { $.get("/toplevel_data", function(data) { - _.extend(toplevel_data.health, data.health); + _.extend(toplevel_data, data); setTimeout(refresh, refresh_interval); }); }; @@ -60,6 +60,14 @@ } } + rivets.formatters.health_ok = function(status_str) { + if (status_str == "HEALTH_OK") { + return true; + } else { + return false; + } + } + var truncate = function(n, max_width) { var stringized = n.toString(); var parts = stringized.split("."); @@ -106,7 +114,7 @@ - rivets.bind($("#health"), toplevel_data.health); + rivets.bind($("#health"), toplevel_data); rivets.bind($("section.sidebar"), toplevel_data); setTimeout(refresh, refresh_interval); }); @@ -140,10 +148,11 @@ Toggle navigation -
- Health:  - - {overall_status} +
+ + + {health_status} +
diff --git a/src/pybind/mgr/dashboard/health.html b/src/pybind/mgr/dashboard/health.html index e41a1e2da5e2f..de5a794f27dc5 100644 --- a/src/pybind/mgr/dashboard/health.html +++ b/src/pybind/mgr/dashboard/health.html @@ -99,12 +99,16 @@

- Overall status: {health.overall_status} +
+ Overall status: {health.status}
    -
  • - {summary.severity}: {summary.summary} -
  • +
      +
    • + {check.type}: + {check.message} +
    • +
diff --git a/src/pybind/mgr/dashboard/module.py b/src/pybind/mgr/dashboard/module.py index 2576680e395de..93300135f49c2 100644 --- a/src/pybind/mgr/dashboard/module.py +++ b/src/pybind/mgr/dashboard/module.py @@ -434,8 +434,8 @@ def _toplevel_data(self): ] return { - 'health': global_instance().get_sync_object(Health).data, 'rbd_pools': rbd_pools, + 'health_status': self._health_data()['status'], 'filesystems': filesystems } @@ -635,6 +635,21 @@ def _servers(self): def servers_data(self): return self._servers() + def _health_data(self): + health = global_instance().get_sync_object(Health).data + # Transform the `checks` dict into a list for the convenience + # of rendering from javascript. + checks = [] + for k, v in health['checks'].iteritems(): + v['type'] = k + checks.append(v) + + checks = sorted(checks, cmp=lambda a, b: a['severity'] > b['severity']) + + health['checks'] = checks + + return health + def _health(self): # Fuse osdmap with pg_summary to get description of pools # including their PG states @@ -670,7 +685,7 @@ def get_rate(series): del osd_map['pg_temp'] return { - "health": global_instance().get_sync_object(Health).data, + "health": self._health_data(), "mon_status": global_instance().get_sync_object( MonStatus).data, "osd_map": osd_map,