From 2fd49a1a617fc2a5736adcd591a2c0b159372fea Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 7 Jun 2017 16:54:19 -0400 Subject: [PATCH] mgr/MgrStandby: respawn when deactivated - It is ugly to unwind all of the Mgr state so that we can reactivate later. - It is perhaps impossible to do shut down the python state reliably. - Respawning provides a clean state and is reliable. This mostly just copies MDSServer::respawn(). Fixes: http://tracker.ceph.com/issues/19595 Fixes: http://tracker.ceph.com/issues/19549 Signed-off-by: Sage Weil --- src/ceph_mgr.cc | 2 +- src/mgr/MgrStandby.cc | 55 ++++++++++++++++++++++++++++++++++++------- src/mgr/MgrStandby.h | 6 ++++- 3 files changed, 52 insertions(+), 11 deletions(-) diff --git a/src/ceph_mgr.cc b/src/ceph_mgr.cc index ae5feb3751acd..051bed7094647 100644 --- a/src/ceph_mgr.cc +++ b/src/ceph_mgr.cc @@ -56,7 +56,7 @@ int main(int argc, const char **argv) global_init_chdir(g_ceph_context); common_init_finish(g_ceph_context); - MgrStandby mgr; + MgrStandby mgr(argc, argv); int rc = mgr.init(); if (rc != 0) { std::cerr << "Error in initialization: " << cpp_strerror(rc) << std::endl; diff --git a/src/mgr/MgrStandby.cc b/src/mgr/MgrStandby.cc index 3d68cfe7f7e01..12a211b120b15 100644 --- a/src/mgr/MgrStandby.cc +++ b/src/mgr/MgrStandby.cc @@ -14,6 +14,8 @@ #include #include "common/errno.h" +#include "common/signal.h" +#include "include/compat.h" #include "include/stringify.h" #include "global/global_context.h" @@ -33,7 +35,7 @@ #define dout_prefix *_dout << "mgr " << __func__ << " " -MgrStandby::MgrStandby() : +MgrStandby::MgrStandby(int argc, const char **argv) : Dispatcher(g_ceph_context), monc{g_ceph_context}, client_messenger(Messenger::create_client_messenger(g_ceph_context, "mgr")), @@ -44,7 +46,9 @@ MgrStandby::MgrStandby() : audit_clog(log_client.create_channel(CLOG_CHANNEL_AUDIT)), lock("MgrStandby::lock"), timer(g_ceph_context, lock), - active_mgr(nullptr) + active_mgr(nullptr), + orig_argc(argc), + orig_argv(argv) { } @@ -202,6 +206,45 @@ void MgrStandby::shutdown() client_messenger->shutdown(); } +void MgrStandby::respawn() +{ + char *new_argv[orig_argc+1]; + dout(1) << " e: '" << orig_argv[0] << "'" << dendl; + for (int i=0; i log_to_monitors; @@ -249,13 +292,7 @@ void MgrStandby::handle_mgr_map(MMgrMap* mmap) } else { if (active_mgr != nullptr) { derr << "I was active but no longer am" << dendl; - active_mgr->shutdown(); - active_mgr.reset(); - - // FIXME: reset monc connection so that our old subscriptions go away - // and we stop getting MLog and MMgrDigest messages. (We do something - // similar in Mgr::init().) - monc.reopen_session(); + respawn(); } } diff --git a/src/mgr/MgrStandby.h b/src/mgr/MgrStandby.h index 8c1c144e73ff4..79cdaa0c00dbc 100644 --- a/src/mgr/MgrStandby.h +++ b/src/mgr/MgrStandby.h @@ -55,6 +55,9 @@ class MgrStandby : public Dispatcher, std::shared_ptr active_mgr; + int orig_argc; + const char **orig_argv; + std::string state_str(); void handle_mgr_map(MMgrMap *m); @@ -62,7 +65,7 @@ class MgrStandby : public Dispatcher, void send_beacon(); public: - MgrStandby(); + MgrStandby(int argc, const char **argv); ~MgrStandby() override; bool ms_dispatch(Message *m) override; @@ -74,6 +77,7 @@ class MgrStandby : public Dispatcher, int init(); void shutdown(); + void respawn(); int main(vector args); void handle_signal(int signum); void tick();